In [None]:
from clickhouse_connect import get_client
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates

client = get_client(
    host="xx",
    port=xx,
    username="xx",
    password="xx"
)

In [None]:
def get_all_data(code: str, date: str = '2025-03-03'):
    sql = f"""
    SELECT 
        time_int,
        price,
        volume,
        side,
        ordno
    FROM stock_base.zb 
    WHERE code = '{code}'
        AND date = '{date}'
        AND date_time >= '{date} 09:15:00'
        AND date_time <= '{date} 09:25:00'
        AND type IS NOT NULL  -- 挂单数据
        AND trade_flag != '4'  -- 撤单标识
        AND price IS NOT NULL
        AND volume IS NOT NULL
    ORDER BY price
    """
    df = client.query_df(sql)
    return df

In [15]:
def get_all_codes(date: str = '2025-03-03'):
    sql = f"""
    SELECT DISTINCT code
    FROM stock_base.zb 
    WHERE date = '{date}'
        AND date_time >= '{date} 09:15:00'
        AND date_time <= '{date} 09:25:00'
        AND type IS NOT NULL
        AND price IS NOT NULL
    LIMIT 100 --限制股票数
    """
    
    try:
        df = client.query_df(sql)
        return df['code'].tolist() if not df.empty else []
    except Exception as e:
        print(f"查询失败: {e}")
        return []

In [6]:
def calculate_opening_price(buy_orders, sell_orders):
    """
    计算集合竞价开盘价
    buy_orders: 买盘DataFrame，已按价格降序排列
    sell_orders: 卖盘DataFrame，已按价格升序排列
    """
    # 1. 获取所有可能的价格
    buy_prices = buy_orders['price'].unique()
    sell_prices = sell_orders['price'].unique()
    all_prices = sorted(set(list(buy_prices) + list(sell_prices)))
    
    max_volume = 0
    best_price = None
    
    # 2. 试算每个价格
    for price in all_prices:
        # 买方：愿意以>=当前价格买的累计量
        buy_volume = buy_orders[buy_orders['price'] >= price]['volume'].sum()
        
        # 卖方：愿意以<=当前价格卖的累计量
        sell_volume = sell_orders[sell_orders['price'] <= price]['volume'].sum()
        
        # 可成交量 = min(买方可买量, 卖方可卖量)
        volume = min(buy_volume, sell_volume)
        
        # 3. 找最大成交量
        if volume > max_volume:
            max_volume = volume
            best_price = price
    
    return best_price, max_volume

In [8]:
# 从日线表获取真实开盘价
def get_real_opening_price(code, date:str = '2025-03-03'):
    sql = f"""
    SELECT open 
    FROM stock_base.daily 
    WHERE code = '{code}' AND date = '{date}'
    LIMIT 1
    """
    
    try:
        df = client.query_df(sql)
        if not df.empty:
            return float(df.iloc[0]['open'])
    except Exception as e:
        print(f"查询失败: {e}")
    
    return None

In [14]:
codes = np.unique(get_all_codes(date='2025-03-03'))
all_codes = len(codes)
print(f"共有{all_codes}支股票")
count = 0

for code in codes:
    df = get_all_data(code)
    buy_orders = df[df['side'] == b'B'].copy()
    sell_orders = df[df['side'] == b'S'].copy()
    best_price, max_volume = calculate_opening_price(buy_orders,sell_orders)
    
    real_price = get_real_opening_price(code)
    if(real_price == best_price):
        count+=1

print(f"计算正确{count}支股票")

共有5117支股票
计算正确2259支股票
