In [None]:
from clickhouse_connect import get_client
import pandas as pd

client = get_client(
    host="xx",
    port=xx,
    username="xx",
    password="xx"
)

def get_canceled_order(code: str, date: str = '2025-03-03'):
    cancel_sql = f"""
    SELECT bidno, askno
    FROM stock_base.zb
    WHERE code = '{code}'
        AND date = '{date}'
        AND date_time >= '{date} 09:15:00'
        AND date_time <= '{date} 09:20:00'
        AND trade_flag = '4'
        AND exg = '1'
    """
    cancel_df = client.query_df(cancel_sql).copy()
    cancel_ids = []
    
    if not cancel_df.empty:
        for _, row in cancel_df.iterrows():
            if pd.notna(row['bidno']) and row['bidno'] != '':
                try:
                    cancel_ids.append(int(row['bidno']))
                except (ValueError, TypeError):
                    pass
            if pd.notna(row['askno']) and row['askno'] != '':
                try:
                    cancel_ids.append(int(row['askno']))
                except (ValueError, TypeError):
                    pass
    
    return cancel_ids

def get_effective_order(code: str, date: str = '2025-03-03'):
    effective_order_sql = f"""
    SELECT price, volume, side, ordno
    FROM stock_base.zb
    WHERE code = '{code}'
        AND date = '{date}'
        AND date_time >= '{date} 09:15:00'
        AND date_time <= '{date} 09:25:00'
        AND trade_flag NOT IN ('4', 'F')
        AND exg = '1'
    ORDER BY price
    """
    all_order_df = client.query_df(effective_order_sql)
    
    cancel_ids = get_canceled_order(code, date)
    if cancel_ids:
        mask = ~all_order_df['ordno'].isin(cancel_ids)
        all_order_df = all_order_df[mask].copy()
    
    return all_order_df

def calculate_open_price(bid_list, ask_list, prev_close=None):
    bid_prices = bid_list['price'].unique()
    ask_prices = ask_list['price'].unique()
    all_prices = sorted(list(bid_prices) + list(ask_prices))
    
    price_data = []
    
    for price in all_prices:
        bid_volume = bid_list[bid_list['price'] >= price]['volume'].sum()
        ask_volume = ask_list[ask_list['price'] <= price]['volume'].sum()
        volume = min(bid_volume, ask_volume)
        
        if volume == 0:
            continue
        
        bid_at_price = bid_list[bid_list['price'] == price]['volume'].sum()
        ask_at_price = ask_list[ask_list['price'] == price]['volume'].sum()
        
        condition_three = False
        if bid_at_price > 0 and ask_at_price > 0:
            bid_can_trade = min(bid_at_price, volume)
            ask_can_trade = min(ask_at_price, volume)
            condition_three = (bid_can_trade == bid_at_price) or (ask_can_trade == ask_at_price)
        else:
            condition_three = True
        
        if not condition_three:
            continue
        
        volume_diff = abs(bid_volume - ask_volume)
        
        price_data.append({
            'price': price,
            'volume': volume,
            'volume_diff': volume_diff
        })
    
    if not price_data:
        return prev_close, 0
    
    max_volume = max(item['volume'] for item in price_data)
    max_volume_prices = [item for item in price_data if item['volume'] == max_volume]
    
    if len(max_volume_prices) == 1:
        best_item = max_volume_prices[0]
        return best_item['price'], best_item['volume']
    
    min_diff = min(item['volume_diff'] for item in max_volume_prices)
    min_diff_prices = [item for item in max_volume_prices if item['volume_diff'] == min_diff]
    
    if len(min_diff_prices) == 1:
        best_item = min_diff_prices[0]
        return best_item['price'], best_item['volume']
    
    if prev_close is not None:
        min_diff_prices.sort(key=lambda x: abs(x['price'] - prev_close))
        best_item = min_diff_prices[0]
        return best_item['price'], best_item['volume']
    else:
        min_diff_prices.sort(key=lambda x: x['price'])
        best_item = min_diff_prices[len(min_diff_prices)//2]
        return best_item['price'], best_item['volume']

def get_real_open_close_price(code: str, date: str = '2025-03-03'):
    real_open_price_sql = f"""
    SELECT open, pclose
    FROM stock_base.daily
    WHERE code = '{code}'
    AND date = '{date}'
    AND exg = '1'
    LIMIT 1
    """
    real_open_price_df = client.query_df(real_open_price_sql)
    if real_open_price_df.empty:
        return None, None
    else:
        real_open_price = real_open_price_df['open'].iloc[0]
        prev_close = real_open_price_df['pclose'].iloc[0]
        return real_open_price, prev_close

def get_codes(date: str = '2025-03-03'):
    codes_sql = f"""
    SELECT DISTINCT code
    FROM stock_base.daily
    WHERE date = '{date}'
    AND exg = '1'
    ORDER BY toInt32(code)
    LIMIT 100
    """
    df = client.query_df(codes_sql)
    codes = df['code'].unique().tolist()
    return codes

if __name__ == "__main__":
    date = '2025-03-03'
    codes = get_codes(date=date)
    
    amount = 0
    worked_amount = 0
    correct_amount = 0
    error_code1 = []
    error_code2 = []

    for code in codes:
        amount += 1
        effective_order_df = get_effective_order(code, date)
        
        if effective_order_df.empty:
            error_code1.append(code)
            continue
        
        bid_order = effective_order_df[effective_order_df['side'] == b'B'].copy()
        ask_order = effective_order_df[effective_order_df['side'] == b'S'].copy()
        
        if bid_order.empty or ask_order.empty:
            error_code1.append(code)
            continue
        
        bid_list = bid_order.sort_values(['price', 'ordno'], ascending=[False, True])
        ask_list = ask_order.sort_values(['price', 'ordno'], ascending=[True, True])
        
        real_open_price, prev_close = get_real_open_close_price(code, date)
        open_price, volume = calculate_open_price(bid_list, ask_list, prev_close)
        
        if open_price is None:
            error_code1.append(code)
        elif real_open_price is None:
            error_code2.append(code)
        else:
            if open_price == real_open_price:
                correct_amount += 1
            worked_amount += 1

    error_amount = amount - worked_amount
    print(f'股票总数: {amount}')
    print(f'计算正确: {correct_amount}')
    print(f'计算异常: {error_amount}')
    print(f'数据异常股票: {error_code1}')
    print(f'缺失开盘价股票: {error_code2}')

股票总数: 100
计算正确: 100
计算异常: 0
数据异常股票: []
缺失开盘价股票: []
