In [2]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from matplotlib.gridspec import GridSpec
from matplotlib.collections import LineCollection
import seaborn as sns
from PIL import Image


In [3]:
def pre_processing(df):
    df['MidPrice'] = (df['AskPrice1'] * df['AskVolume1'] + df['BidPrice1'] * df['BidVolume1']) / (df['AskVolume1'] + df['BidVolume1'])
    df['position_change'] = df.groupby('StockID')['share_holding'].diff()
    df['ticks_since_last_change'] = df.groupby('StockID').apply(lambda x: x['Tick'] - x['Tick'].where(x['position_change'] != 0).ffill()).reset_index(level=0, drop=True)
    return df

def calculate_vwap(df: pd.DataFrame):
    df['CumulativeVolume'] = df['share_holding'].cumsum()
    df['CumulativeValue'] = df['trade_value'].cumsum()
    df['VWAP'] = abs(df['CumulativeValue'] / df['CumulativeVolume'])
    return df

def plot_all_stocks_with_vwap(df, output_folder='stock_plots'):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    df = df.groupby('StockID').apply(calculate_vwap).reset_index(drop=True)

    for stock_id in df['StockID'].unique():
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
        fig.suptitle(f'Stock {stock_id}: Price, VWAP, and Trading Activity', fontsize=16)
        
        stock_data = df[df['StockID'] == stock_id]
        
        # Plot price
        ax1.plot(stock_data['Tick'], stock_data['MidPrice'], label='Mid Price', color='b', linewidth=1.5)
        ax1.fill_between(stock_data['Tick'], stock_data['BidPrice1'], stock_data['AskPrice1'], 
                         alpha=0.2, color='gray', label='Bid-Ask Spread')
        
        # Plot colored VWAP
        has_positive = (stock_data['position_change'] > 0).any()
        color = 'green' if has_positive else 'red'
        ax1.plot(stock_data['Tick'], stock_data['VWAP'], label='My_vwap', color=color, linestyle='--', linewidth=1.5)
        ax1.plot(stock_data['Tick'], stock_data['twap'], label='twao', color='grey', linestyle='--', linewidth=1.5)
        
        ax1.set_ylabel('Price', fontsize=12)
        ax1.tick_params(axis='y', labelsize=10)
        
        # Set y-axis limits for price plot
        price_range = stock_data['MidPrice'].max() - stock_data['MidPrice'].min()
        ax1.set_ylim(stock_data['MidPrice'].min() - price_range*0.1, 
                     stock_data['MidPrice'].max() + price_range*0.1)
        
        # Mark points where trading occurred
        buys = stock_data[stock_data['position_change'] > 0]
        sells = stock_data[stock_data['position_change'] < 0]
        ax1.scatter(buys['Tick'], buys['MidPrice'], color='g', s=50, label='Buy', marker='^', zorder=5)
        ax1.scatter(sells['Tick'], sells['MidPrice'], color='r', s=50, label='Sell', marker='v', zorder=5)
        
        ax1.legend(fontsize=10, loc='upper left')
        
        # Plot share holding and target volume
        ax2.plot(stock_data['Tick'], stock_data['share_holding'], color='b', alpha=0.7, label='Share Holding', linewidth=1.5)
        ax2.plot(stock_data['Tick'], stock_data['target_volume'], color='m', alpha=0.7, label='Target Volume', linewidth=1.5)
        ax2.set_ylabel('Volume', fontsize=12)
        ax2.tick_params(axis='y', labelsize=10)
        
        # Plot trade volume as bars
        trade_volume = stock_data['position_change'].abs()
        ax2.bar(stock_data['Tick'], trade_volume, alpha=0.3, color='g', label='Trade Volume')
        
        ax2.legend(fontsize=10, loc='upper left')
        
        ax2.set_xlabel('Tick', fontsize=12)
        ax2.tick_params(axis='x', labelsize=10)
        
        plt.tight_layout()
        
        filename = f'{stock_id}_plot.png'
        filepath = os.path.join(output_folder, filename)
        plt.savefig(filepath, dpi=300, bbox_inches='tight')
        plt.close(fig)
        
        print(f"Saved plot for {stock_id} to {filepath}")
        
def stitch_images(directory, output_prefix, columns=5, max_rows_per_image=5):
    # 获取目录下所有图片文件
    image_files = sorted([f for f in os.listdir(directory) if f.endswith(('.png', '.jpg', '.jpeg'))])
    
    # 打开第一个图片以获取尺寸
    with Image.open(os.path.join(directory, image_files[0])) as img:
        width, height = img.size
    
    # 计算每个批次的图片数量
    images_per_batch = columns * max_rows_per_image
    
    # 批量处理图片
    for batch, i in enumerate(range(0, len(image_files), images_per_batch)):
        batch_files = image_files[i:i+images_per_batch]
        
        # 计算当前批次的行数
        rows = (len(batch_files) + columns - 1) // columns
        
        # 创建新图片
        result = Image.new('RGB', (width * columns, height * rows))
        
        # 粘贴图片
        for idx, f in enumerate(batch_files):
            with Image.open(os.path.join(directory, f)) as img:
                x = (idx % columns) * width
                y = (idx // columns) * height
                result.paste(img, (x, y))
        
        # 保存结果
        output_file = f"{output_prefix}_batch_{batch+1}.png"
        result.save(os.path.join(directory, output_file))
        print(f"Saved {output_file}")

In [4]:
def calculate_scores(df):
    df = df.groupby('StockID').apply(calculate_vwap).reset_index(drop=True)
    
    # 计算未完成量
    df['unfinished_volume'] = df['target_volume'] - df['share_holding']
    
    # 模拟收盘时强制平仓
    df['final_trade_value'] = df['unfinished_volume'] * df['MidPrice']
    df['final_trade_volume'] = df['unfinished_volume'].abs()
    
    # 计算最终的VWAP
    df['final_VWAP'] = np.where(
        df['CumulativeVolume'] + df['final_trade_volume'] != 0,
        (df['CumulativeValue'] + df['final_trade_value']) / (df['CumulativeVolume'] + df['final_trade_volume']),
        df['MidPrice']  # 如果分母为0，使用当前中间价格
    )
    
    # 计算得分
    df['trade_direction'] = np.sign(df['target_volume'])
    df['score'] = (1 - df['final_VWAP'] / df['twap']) * df['trade_direction'] + 0.0004
    
    return df

def plot_score_histogram(df, output_folder='stock_plots'):
    scores = df.groupby('StockID')['score'].last()
    
    plt.figure(figsize=(12, 6))
    plt.hist(scores, bins=20, edgecolor='black')
    plt.title('Distribution of Stock Scores')
    plt.xlabel('Score')
    plt.ylabel('Frequency')
    
    # 计算当日总得分
    mean_score = scores.mean()
    score_std = scores.std()
    total_score = np.abs(mean_score) * (mean_score / score_std)
    
    plt.text(0.95, 0.95, 
             f'Mean Score: {mean_score:.6f}\n'
             f'Score Std: {score_std:.6f}\n'
             f'Total Score: {total_score:.6f}', 
             transform=plt.gca().transAxes, 
             horizontalalignment='right', 
             verticalalignment='top', 
             fontsize=12)
    
    filename = 'score_histogram.png'
    filepath = os.path.join(output_folder, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Saved score histogram to {filepath}")
    
    # 保存总得分到文本文件
    with open(os.path.join(output_folder, 'total_score.txt'), 'w') as f:
        f.write(f'Mean Score: {mean_score:.6f}\n')
        f.write(f'Score Std: {score_std:.6f}\n')
        f.write(f'Total Score: {total_score:.6f}\n')


In [13]:
game = '083110'
day = 2

csv_path = os.path.join(
    './snapshots/',
    f'{game}-day{day}_all_stocks.csv'
    )

df = pd.read_csv(csv_path)

output_folder = f'./analysis/{game}_day{day}'

In [14]:
df = df.drop_duplicates(subset='Tick')

Unnamed: 0,Tick,StockID,AskPrice1,AskPrice2,AskPrice3,AskPrice4,AskPrice5,AskPrice6,AskPrice7,AskPrice8,...,last_price,twap,share_holding,orders,error_orders,order_value,trade_value,target_volume,remain_volume,frozen_volume
0,-1,UBIQ000,60.45,60.46,60.47,60.48,60.49,60.50,60.51,60.52,...,60.44,0.000000,0,0,0,0,0,3400,3400,0
50,0,UBIQ000,60.45,60.46,60.47,60.48,60.49,60.50,60.51,60.52,...,60.43,60.440000,0,0,0,0,0,3400,3400,0
100,1,UBIQ000,60.45,60.46,60.47,60.48,60.49,60.50,60.51,60.52,...,60.43,60.435312,0,0,0,0,0,3400,3400,0
150,2,UBIQ000,60.45,60.46,60.47,60.48,60.49,60.50,60.51,60.52,...,60.44,60.437986,0,0,0,0,0,3400,3400,0
200,3,UBIQ000,60.45,60.46,60.47,60.48,60.49,60.50,60.51,60.52,...,60.44,60.437240,0,0,0,0,0,3400,3400,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149800,2995,UBIQ000,60.38,60.39,60.40,60.41,60.42,60.43,60.44,60.45,...,60.38,60.197148,3400,25,0,240816,204704,3400,0,0
149850,2996,UBIQ000,60.38,60.39,60.40,60.41,60.42,60.43,60.44,60.45,...,60.38,60.197216,3400,25,0,240816,204704,3400,0,0
149900,2997,UBIQ000,60.38,60.39,60.40,60.41,60.42,60.43,60.44,60.45,...,60.38,60.197281,3400,25,0,240816,204704,3400,0,0
149950,2998,UBIQ000,60.39,60.40,60.41,60.42,60.43,60.44,60.45,60.46,...,60.37,60.197348,3400,25,0,240816,204704,3400,0,0


In [6]:
df = pre_processing(df)
plot_all_stocks_with_vwap(df, output_folder=output_folder)
stitch_images(output_folder, output_prefix=f'output{day}', columns=5)

  df['ticks_since_last_change'] = df.groupby('StockID').apply(lambda x: x['Tick'] - x['Tick'].where(x['position_change'] != 0).ffill()).reset_index(level=0, drop=True)
  df = df.groupby('StockID').apply(calculate_vwap).reset_index(drop=True)


Saved plot for UBIQ000 to ./analysis/083110_day1/UBIQ000_plot.png
Saved plot for UBIQ001 to ./analysis/083110_day1/UBIQ001_plot.png
Saved plot for UBIQ002 to ./analysis/083110_day1/UBIQ002_plot.png
Saved plot for UBIQ003 to ./analysis/083110_day1/UBIQ003_plot.png
Saved plot for UBIQ004 to ./analysis/083110_day1/UBIQ004_plot.png
Saved plot for UBIQ005 to ./analysis/083110_day1/UBIQ005_plot.png
Saved plot for UBIQ006 to ./analysis/083110_day1/UBIQ006_plot.png
Saved plot for UBIQ007 to ./analysis/083110_day1/UBIQ007_plot.png
Saved plot for UBIQ008 to ./analysis/083110_day1/UBIQ008_plot.png
Saved plot for UBIQ009 to ./analysis/083110_day1/UBIQ009_plot.png
Saved plot for UBIQ010 to ./analysis/083110_day1/UBIQ010_plot.png
Saved plot for UBIQ011 to ./analysis/083110_day1/UBIQ011_plot.png
Saved plot for UBIQ012 to ./analysis/083110_day1/UBIQ012_plot.png
Saved plot for UBIQ013 to ./analysis/083110_day1/UBIQ013_plot.png
Saved plot for UBIQ014 to ./analysis/083110_day1/UBIQ014_plot.png
Saved plot

In [None]:
df_with_scores = calculate_scores(df)

plot_score_histogram(df_with_scores, output_folder=output_folder)

  df = df.groupby('StockID').apply(calculate_vwap).reset_index(drop=True)


Saved score histogram to ./analysis/083012_day63/score_histogram.png
