In [None]:
import pandas as pd
import numpy as np
import os
from datetime import datetime

# ======================== 配置（不变，仅适配振幅计算逻辑） ========================
CONFIG = {
    "factortable_path": r'D:\workspace\xiaoyao\data\factortable.parquet',
    "result_root_dir": r'.\daily_selection_results_aligned',
    "all_result_save_path": r'.\all_selection_results_aligned.csv',
    "log_path": r'.\batch_selection_log_aligned.txt',
    "top_n": 20,
    "shape_filters": {
        "rise_ratio_30d_min": 0.15,
        "pullback_ratio_to_rise_min": 0.15,
        "pullback_ratio_to_rise_max": 0.5,
        "pullback_days_min": 7,
        "pullback_days_max": 20,
        "consolidation_days": 15,
        "consolidation_amplitude_max": 0.05,  # 盘整振幅≤5%（计算后的值）
        "breakthrough_ratio": 0.95,
        "consecutive_up_days_min": 5,
        "small_rise_ratio_30d_min": 0.1,
        "small_rise_ratio_30d_max": 0.4,
        "volume_ratio_min": 1.0,
        "rsi_safe_max": 60
    },
    "sort_weights": {
        "volume_ratio_5d": 0.3,
        "price_rise_score": 0.25,
        "macd_score": 0.2,
        "rsi_score": 0.15,
        "bollinger_score": 0.1
    }
}

# ======================== 工具函数（核心修复：移除amplitude字段验证） ========================
def init_environment():
    os.makedirs(CONFIG["result_root_dir"], exist_ok=True)
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【多形态组合选股启动（修复振幅依赖）】{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    log_msg(f"✅ 初始化完成：结果目录={CONFIG['result_root_dir']}")

def log_msg(msg):
    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{current_time}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

def validate_data(df):
    # 核心修复：移除'amplitude'字段验证，用现有字段计算
    required_cols = ['stock_code', 'date', 'close', 'open', 'volume', 'high', 'low',
                    'consecutive_up_days', 'is_high_open', 'ma5', 'ma20',
                    'macd_line', 'signal_line', 'macd_hist', 'volume_ratio_5d',
                    'high_30d', 'pullback_ratio', 'pullback_days', 'bollinger_lower', 'rsi14']
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        raise ValueError(f"字段缺失：{missing}，请检查factortable数据")
    log_msg("✅ 数据字段验证通过（动态计算振幅，不依赖原始amplitude）")
    return df

# ======================== 步骤1：计算衍生指标（核心修复：新增振幅计算） ========================
def pre_calculate_indicators(df):
    df = df.copy()
    
    # 1. 计算30日涨幅（形态1和3需要）
    def calc_rise_ratio(group):
        group['rise_ratio_30d'] = (group['high_30d'] - group['close'].shift(30)) / \
                                 group['close'].shift(30).replace(0, 0.0001)
        group['rise_ratio_30d'] = group['rise_ratio_30d'].clip(0, 1.0).fillna(0)
        return group['rise_ratio_30d']
    df['rise_ratio_30d'] = df.groupby('stock_code', group_keys=False)[['close', 'high_30d']].apply(calc_rise_ratio)
    
    # 2. 核心修复：计算当日振幅（替代原始'amplitude'字段）
    # 振幅 = (当日最高价 - 当日最低价) / 前一日收盘价（避免当日收盘价异常）
    def calc_amplitude(group):
        # 前一日收盘价（shift(1)），填充首日为当日收盘价（避免NaN）
        prev_close = group['close'].shift(1).fillna(group['close'])
        group['calc_amplitude'] = (group['high'] - group['low']) / prev_close
        return group['calc_amplitude']
    df['calc_amplitude'] = df.groupby('stock_code', group_keys=False)[['high', 'low', 'close']].apply(calc_amplitude)
    # 处理振幅异常值（超过20%视为异常，设为20%）
    df['calc_amplitude'] = df['calc_amplitude'].clip(0, 0.2).fillna(0)
    
    # 3. 计算盘整期平均振幅（形态2需要，用计算出的振幅）
    def calc_consolidation_amplitude(group):
        return group['calc_amplitude'].rolling(CONFIG["shape_filters"]["consolidation_days"]).mean()
    df['consolidation_amplitude'] = df.groupby('stock_code', group_keys=False)['calc_amplitude'].apply(calc_consolidation_amplitude)
    
    log_msg("✅ 衍生指标计算完成（含动态振幅计算）")
    return df

# ======================== 步骤2：多形态组合筛选（替换为计算的振幅，逻辑不变） ========================
def filter_stocks(daily_df):
    params = CONFIG["shape_filters"]
    daily_df = daily_df.copy()
    
    # ---------------------- 形态1：调整后企稳启动（无振幅依赖，逻辑不变） ----------------------
    cond1_1 = daily_df['rise_ratio_30d'] >= params['rise_ratio_30d_min']
    valid_rise = daily_df['rise_ratio_30d'].where(cond1_1, 1e-8)
    pullback_to_rise = daily_df['pullback_ratio'] / valid_rise
    cond1_2 = pullback_to_rise.between(params['pullback_ratio_to_rise_min'], params['pullback_ratio_to_rise_max'])
    cond1_3 = daily_df['pullback_days'].between(params['pullback_days_min'], params['pullback_days_max'])
    cond1_4 = (daily_df['close'] >= daily_df['ma5']) & (daily_df['ma5'] >= daily_df['ma20'])
    cond1_5 = daily_df['volume_ratio_5d'] >= params['volume_ratio_min']
    cond_shape1 = cond1_1 & cond1_2 & cond1_3 & cond1_4 & cond1_5
    
    # ---------------------- 形态2：盘整突破（核心修复：用calc_amplitude替代原始amplitude） ----------------------
    cond2_1 = daily_df['consolidation_amplitude'] <= params['consolidation_amplitude_max']  # 用计算的平均振幅
    cond2_2 = daily_df['consolidation_amplitude'].notna()
    cond2_3 = daily_df['close'] >= daily_df['high_30d'] * params['breakthrough_ratio']
    cond2_4 = daily_df['volume_ratio_5d'] >= params['volume_ratio_min'] * 1.3
    cond2_5 = daily_df['is_high_open'] == True
    cond_shape2 = cond2_1 & cond2_2 & cond2_3 & cond2_4 & cond2_5
    
    # ---------------------- 形态3：小步上涨加速（无振幅依赖，逻辑不变） ----------------------
    cond3_1 = daily_df['consecutive_up_days'] >= params['consecutive_up_days_min']
    cond3_2 = daily_df['rise_ratio_30d'].between(params['small_rise_ratio_30d_min'], params['small_rise_ratio_30d_max'])
    cond3_3 = daily_df['ma5'] >= daily_df['ma20']
    cond3_4 = daily_df['volume_ratio_5d'] >= params['volume_ratio_min'] * 1.1
    cond3_5 = daily_df['rsi14'] <= params['rsi_safe_max']
    cond_shape3 = cond3_1 & cond3_2 & cond3_3 & cond3_4 & cond3_5
    
    # ---------------------- 组合筛选（逻辑不变） ----------------------
    total_cond = cond_shape1 | cond_shape2 | cond_shape3
    filtered_df = daily_df[total_cond].copy()
    
    # 日志统计（补充振幅相关信息）
    shape1_count = cond_shape1.sum()
    shape2_count = cond_shape2.sum()
    shape3_count = cond_shape3.sum()
    avg_calc_amplitude = daily_df['calc_amplitude'].mean() * 100
    log_msg(f"形态筛选统计：调整企稳{shape1_count}只 | 盘整突破{shape2_count}只 | 小步加速{shape3_count}只")
    log_msg(f"当日平均振幅：{avg_calc_amplitude:.2f}%（动态计算）")
    log_msg(f"强制筛选后股票池大小：{len(filtered_df)}/{len(daily_df)}")
    
    # 删除临时字段
    temp_cols = ['valid_rise', 'pullback_to_rise', 'calc_amplitude', 'consolidation_amplitude']
    filtered_df = filtered_df.drop(columns=temp_cols, errors='ignore')
    return filtered_df

# ======================== 步骤3：技术指标排序（完全不变） ========================
def score_stocks(filtered_df):
    if len(filtered_df) == 0:
        return filtered_df
    df = filtered_df.copy()
    weights = CONFIG["sort_weights"]
    
    # 1. 量能强度得分
    vol_min, vol_max = df['volume_ratio_5d'].min(), df['volume_ratio_5d'].max()
    df['volume_score'] = ((df['volume_ratio_5d'] - vol_min) / (vol_max - vol_min + 1e-8)) * 100
    
    # 2. 价格抬升得分
    up_days_max = df['consecutive_up_days'].max() if df['consecutive_up_days'].max() > 0 else 1
    df['price_rise_score'] = (df['consecutive_up_days'] / up_days_max) * 100
    
    # 3. MACD动能得分
    macd_hist_min, macd_hist_max = df['macd_hist'].min(), df['macd_hist'].max()
    df['macd_hist_score'] = ((df['macd_hist'] - macd_hist_min) / (macd_hist_max - macd_hist_min + 1e-8)) * 50
    macd_diff = df['macd_line'] - df['signal_line']
    df['macd_cross_score'] = ((macd_diff + 0.1) / (0.1 + 0.1 + 1e-8)) * 50
    df['macd_score'] = (df['macd_hist_score'] + df['macd_cross_score']).clip(0, 100)
    
    # 4. RSI位置得分
    df['rsi_score'] = 100 - (abs(df['rsi14'] - 40) / 40) * 100
    df['rsi_score'] = df['rsi_score'].clip(0, 100)
    
    # 5. 布林带接近度得分
    bollinger_ratio = df['close'] / df['bollinger_lower']
    df['bollinger_score'] = (1.1 - bollinger_ratio) / (1.1 - 1.0 + 1e-8) * 100
    df['bollinger_score'] = df['bollinger_score'].clip(0, 100)
    
    # 综合得分
    df['total_score'] = (
        df['volume_score'] * weights['volume_ratio_5d'] +
        df['price_rise_score'] * weights['price_rise_score'] +
        df['macd_score'] * weights['macd_score'] +
        df['rsi_score'] * weights['rsi_score'] +
        df['bollinger_score'] * weights['bollinger_score']
    )
    
    df = df.sort_values(by='total_score', ascending=False).reset_index(drop=True)
    log_msg(f"柔性排序完成：有效标的{len(df)}只，最高得分{df['total_score'].max():.2f}")
    return df

# ======================== 批量处理单交易日（完全不变） ========================
def process_single_day(daily_df, trade_date):
    log_msg(f"\n===== 处理 {trade_date.strftime('%Y-%m-%d')} 选股 =====")
    
    filtered_df = filter_stocks(daily_df)
    if len(filtered_df) == 0:
        log_msg(f"⚠️ 当日无符合形态的股票，建议放宽：形态1回撤占比→10% 或 形态2振幅→6% 或 形态3连续上涨→4天")
        return pd.DataFrame()
    
    ranked_df = score_stocks(filtered_df)
    if len(ranked_df) == 0:
        log_msg(f"⚠️ 排序后无标的，跳过当日")
        return pd.DataFrame()
    
    top_df = ranked_df.head(CONFIG["top_n"]).copy()
    result_cols = [
        'stock_code', 'close', 'rise_ratio_30d', 'pullback_ratio',
        'pullback_days', 'volume_ratio_5d', 'rsi14', 'total_score',
        'consecutive_up_days', 'macd_hist', 'bollinger_lower', 'ma20'
    ]
    top_df = top_df[result_cols].reset_index(drop=True)
    top_df['trade_date'] = trade_date.strftime('%Y-%m-%d')
    
    date_str = trade_date.strftime('%Y%m%d')
    daily_save_path = os.path.join(CONFIG["result_root_dir"], f"selection_{date_str}.csv")
    top_df.to_csv(daily_save_path, index=False, encoding='utf-8-sig')
    log_msg(f"✅ 当日结果保存：{daily_save_path}（Top{len(top_df)}）")
    
    return top_df

# ======================== 批量主流程（完全不变） ========================
def batch_selection():
    try:
        init_environment()
        log_msg("="*60 + " 多形态组合选股（修复振幅依赖） " + "="*60)
        
        log_msg(f"加载数据：{CONFIG['factortable_path']}")
        df = pd.read_parquet(CONFIG["factortable_path"])
        df['date'] = pd.to_datetime(df['date']).dt.date
        df = validate_data(df)
        
        df = pre_calculate_indicators(df)
        
        trade_dates = sorted(df['date'].unique())
        log_msg(f"检测到 {len(trade_dates)} 个交易日，开始批量处理...")
        
        all_results = []
        for trade_date in trade_dates:
            daily_df = df[df['date'] == trade_date].copy()
            if len(daily_df) < 1000:
                log_msg(f"⚠️ {trade_date} 数据异常（股票数={len(daily_df)}），跳过")
                continue
            
            daily_result = process_single_day(daily_df, trade_date)
            if not daily_result.empty:
                all_results.append(daily_result)
        
        if all_results:
            all_result_df = pd.concat(all_results, ignore_index=True)
            all_result_df.to_csv(CONFIG["all_result_save_path"], index=False, encoding='utf-8-sig')
            log_msg(f"\n" + "="*60)
            log_msg(f"✅ 批量选股完成！")
            log_msg(f"📊 统计：共处理 {len(trade_dates)} 交易日，有效结果 {len(all_result_df)} 条")
            log_msg(f"📁 每日结果目录：{CONFIG['result_root_dir']}")
            log_msg(f"📁 汇总结果路径：{CONFIG['all_result_save_path']}")
        else:
            log_msg(f"\n" + "="*60)
            log_msg(f"⚠️ 无任何选股结果，建议按日志提示放宽对应形态条件")
        
    except Exception as e:
        log_msg(f"❌ 批量处理错误：{str(e)}")
        raise

# ======================== 执行入口 ========================
if __name__ == "__main__":
    batch_selection()

[2025-10-24 11:25:49] ✅ 初始化完成：结果目录=.\daily_selection_results_aligned
[2025-10-24 11:25:49] 加载数据：D:\workspace\xiaoyao\data\factortable.parquet
[2025-10-24 11:25:51] ✅ 数据字段验证通过（支撑多形态筛选）
[2025-10-24 11:25:58] ❌ 批量处理错误：'amplitude'


KeyError: 'amplitude'