In [None]:
import pandas as pd
import numpy as np
import os

CONFIG = {
    "selection_result_path": r'./short_term_selection_optimized.csv',
    "raw_data_path": r'D:\workspace\xiaoyao\data\widetable.parquet',
    "backtest_result_path": r'./backtest_result_optimized.csv',
    "backtest_summary_path": r'./backtest_summary_optimized.txt',
    "log_path": r'./backtest_optimized_log.txt',
    "trade_rule": {
        "buy_delay": 1,
        "sell_delay": 5,
        "min_valid_days": 6
    }
}

# --------------------------
# 工具函数（新增分组统计）
# --------------------------
def init_environment():
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【优化版回测启动】{pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    log_msg("✅ 回测环境初始化完成")

def log_msg(msg):
    timestamp = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{timestamp}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

def calc_group_stats(df):
    """新增：按竞价得分/量价得分分组统计"""
    # 按竞价得分分3组
    df['auction_group'] = pd.qcut(df['auction_score'], 3, labels=['低竞价得分', '中竞价得分', '高竞价得分'])
    # 按量价得分分3组
    df['pv_group'] = pd.qcut(df['price_volume_score'], 3, labels=['低量价得分', '中量价得分', '高量价得分'])
    
    # 分组统计
    group_stats = []
    # 1. 竞价得分分组
    auction_stats = df.groupby('auction_group').agg({
        'return_rate': ['count', 'mean', 'median', lambda x: (x>0).mean()*100],
        'stock_code': 'nunique'
    }).round(2)
    auction_stats.columns = ['交易数', '平均收益(%)', '中位数收益(%)', '正收益比例(%)', '股票数']
    for group in auction_stats.index:
        group_stats.append({
            '分组类型': '竞价得分',
            '分组': group,
            '交易数': auction_stats.loc[group, '交易数'],
            '平均收益(%)': auction_stats.loc[group, '平均收益(%)'],
            '中位数收益(%)': auction_stats.loc[group, '中位数收益(%)'],
            '正收益比例(%)': auction_stats.loc[group, '正收益比例(%)']
        })
    
    # 2. 量价得分分组
    pv_stats = df.groupby('pv_group').agg({
        'return_rate': ['count', 'mean', 'median', lambda x: (x>0).mean()*100],
        'stock_code': 'nunique'
    }).round(2)
    pv_stats.columns = ['交易数', '平均收益(%)', '中位数收益(%)', '正收益比例(%)', '股票数']
    for group in pv_stats.index:
        group_stats.append({
            '分组类型': '量价得分',
            '分组': group,
            '交易数': pv_stats.loc[group, '交易数'],
            '平均收益(%)': pv_stats.loc[group, '平均收益(%)'],
            '中位数收益(%)': pv_stats.loc[group, '中位数收益(%)'],
            '正收益比例(%)': pv_stats.loc[group, '正收益比例(%)']
        })
    
    return pd.DataFrame(group_stats), df

def save_summary(summary_dict, group_stats_df):
    """保存汇总+分组统计"""
    # 分组统计文本
    group_text = "\n4. 分组统计（关键优化验证）\n"
    group_text += "="*50 + "\n"
    group_text += group_stats_df.to_string(index=False) + "\n"
    
    summary_content = f"""
【优化版回测统计结果】
==========================
回测规则：T日选股 → T+{CONFIG['trade_rule']['buy_delay']}买入 → T+{CONFIG['trade_rule']['sell_delay']}卖出
==========================
1. 基础统计
   - 选股总记录数：{summary_dict['total_selection']} 条
   - 有效交易记录数：{summary_dict['valid_trade']} 条
   - 无效记录数：{summary_dict['invalid_trade']} 条
   - 有效交易率：{summary_dict['valid_rate']:.2f}%

2. 收益统计
   - 平均收益率：{summary_dict['avg_return']:.2f}%
   - 中位数收益率：{summary_dict['median_return']:.2f}%
   - 正收益比例：{summary_dict['positive_ratio']:.2f}%（{summary_dict['positive_count']}/{summary_dict['valid_trade']}）
   - 最大收益率：{summary_dict['max_return']:.2f}%
   - 最小收益率：{summary_dict['min_return']:.2f}%

3. 风险统计
   - 收益率标准差：{summary_dict['std_return']:.2f}%
   - 最大回撤：{summary_dict['max_drawdown']:.2f}%
{group_text}
=========================="""
    with open(CONFIG["backtest_summary_path"], 'w', encoding='utf-8') as f:
        f.write(summary_content)
    log_msg(f"✅ 优化版回测汇总保存：{CONFIG['backtest_summary_path']}")

# --------------------------
# 主回测逻辑（不变，新增分组统计调用）
# --------------------------
def run_backtest():
    try:
        init_environment()
        # 加载数据
        log_msg("加载优化版选股结果...")
        selection_df = pd.read_csv(CONFIG["selection_result_path"])
        selection_df['date'] = pd.to_datetime(selection_df['date']).dt.date
        log_msg(f"✅ 选股结果：{len(selection_df)}条记录，{selection_df['stock_code'].nunique()}只股票")
        
        log_msg("加载原始行情数据...")
        raw_df = pd.read_parquet(CONFIG["raw_data_path"])
        raw_df = raw_df.sort_values(by=['stock_code', 'date']).reset_index(drop=True)
        raw_df['date'] = pd.to_datetime(raw_df['date']).dt.date
        raw_df = raw_df[['stock_code', 'date', 'close']].dropna(subset=['close'])
        
        # 匹配买卖价格（不变）
        log_msg("匹配买卖价格...")
        raw_df['trade_seq'] = raw_df.groupby('stock_code').cumcount()
        selection_df = selection_df.merge(
            raw_df[['stock_code', 'date', 'trade_seq']],
            on=['stock_code', 'date'],
            how='left'
        ).dropna(subset=['trade_seq'])
        selection_df['trade_seq'] = selection_df['trade_seq'].astype(int)
        
        buy_seq = selection_df['trade_seq'] + CONFIG['trade_rule']['buy_delay']
        sell_seq = selection_df['trade_seq'] + CONFIG['trade_rule']['sell_delay']
        
        buy_price = raw_df.set_index(['stock_code', 'trade_seq'])['close'].reindex(
            pd.MultiIndex.from_arrays([selection_df['stock_code'], buy_seq], names=['stock_code', 'trade_seq'])
        ).values
        selection_df['buy_price'] = buy_price
        
        sell_price = raw_df.set_index(['stock_code', 'trade_seq'])['close'].reindex(
            pd.MultiIndex.from_arrays([selection_df['stock_code'], sell_seq], names=['stock_code', 'trade_seq'])
        ).values
        selection_df['sell_price'] = sell_price
        
        # 计算收益（不变）
        selection_df['return_rate'] = (selection_df['sell_price'] - selection_df['buy_price']) / \
                                    selection_df['buy_price'].replace(0, 0.0001) * 100
        valid_mask = selection_df['buy_price'].notna() & selection_df['sell_price'].notna()
        backtest_result = selection_df[valid_mask].copy()
        invalid_count = len(selection_df) - len(backtest_result)
        
        # 新增：分组统计
        group_stats_df, backtest_result_with_group = calc_group_stats(backtest_result)
        
        # 统计汇总
        summary_dict = {
            "total_selection": len(selection_df),
            "valid_trade": len(backtest_result),
            "invalid_trade": invalid_count,
            "valid_rate": len(backtest_result)/len(selection_df)*100 if len(selection_df)>0 else 0,
            "avg_return": backtest_result['return_rate'].mean() if len(backtest_result)>0 else 0,
            "median_return": backtest_result['return_rate'].median() if len(backtest_result)>0 else 0,
            "positive_count": (backtest_result['return_rate']>0).sum() if len(backtest_result)>0 else 0,
            "positive_ratio": (backtest_result['return_rate']>0).mean()*100 if len(backtest_result)>0 else 0,
            "max_return": backtest_result['return_rate'].max() if len(backtest_result)>0 else 0,
            "min_return": backtest_result['return_rate'].min() if len(backtest_result)>0 else 0,
            "std_return": backtest_result['return_rate'].std() if len(backtest_result)>0 else 0,
            "max_drawdown": 0  # 简化版暂不计算动态回撤
        }
        
        # 保存结果
        backtest_result_with_group.to_csv(CONFIG["backtest_result_path"], index=False, encoding='utf-8-sig')
        save_summary(summary_dict, group_stats_df)
        
        # 打印核心结果
        log_msg(f"\n" + "="*60)
        log_msg(f"✅ 优化版回测完成！核心结果：")
        log_msg(f"📊 有效交易：{summary_dict['valid_trade']}条 | 正收益比例：{summary_dict['positive_ratio']:.2f}%")
        log_msg(f"📈 平均收益：{summary_dict['avg_return']:.2f}% | 中位数收益：{summary_dict['median_return']:.2f}%")
        log_msg(f"📊 分组统计：查看{CONFIG['backtest_summary_path']}，重点关注高竞价/量价得分组表现")
        log_msg("="*60)
    except Exception as e:
        log_msg(f"❌ 回测失败：{str(e)}")
        raise

if __name__ == "__main__":
    run_backtest()

[2025-10-24 12:57:20] ✅ 回测环境初始化完成
[2025-10-24 12:57:20] 加载优化版选股结果...
[2025-10-24 12:57:20] ❌ 回测失败：[Errno 2] No such file or directory: './daily_short_term_optimized.csv'


FileNotFoundError: [Errno 2] No such file or directory: './daily_short_term_optimized.csv'