In [None]:
import pandas as pd
import numpy as np
import os

# --------------------------
# 配置参数（极致宽松：优先保证数量）
# --------------------------
CONFIG = {
    "factortable_path": r'./factortable.parquet',
    "selection_result_path": r'./short_term_ultra_relax.csv',
    "daily_result_dir": r'./daily_ultra_relax',
    "log_path": r'./ultra_relax_log.txt',
    "top_n": 200,  # 最大200
    # 筛选参数：极致宽松（打破多条件绑定）
    "short_term_shape": {
        "consecutive_up_days_min": 1,        # 连续上涨≥1天（核心宽松）
        "consecutive_up_days_max": 10,       # 连续≤10天
        "rise_ratio_30d_min": 0.03,          # 30日涨幅≥3%（包容弱势启动）
        "rise_ratio_30d_max": 0.5,           # 30日≤50%（规避高风险）
        "daily_rise_min": 0.001,             # 单日≥0.1%（极温和上涨）
        "daily_rise_max": 0.08,              # 单日≤8%（允许稍强波动）
        "volume_ratio_min": 0.7,             # 量能比≥0.7（低量能也包容）
        "volume_ratio_consecutive_min": 0,   # 连续≥0天（当天达标即可）
        "rsi_safe_max": 75,                  # RSI≤75（大幅放宽超买）
        "ma_trend": False,                   # 取消均线多头强制要求（仅作为排序项）
        "macd_positive": False,              # 取消MACD正强制要求
        "price_ma5_deviation_max": 0.05,     # 收盘价偏离5日线≤5%（放宽支撑）
        # 竞价：从“强制高开”改为“量能达标即可”
        "auction_volume_ratio_min": 0.03,    # 竞价量比≥3%（低门槛）
        "high_open_min": -0.01,              # 允许小幅低开≤1%
        "high_open_max": 0.04,               # 允许高开≤4%
    },
    # 排序权重：强化核心盈利因子（用排序补筛选宽松的缺口）
    "sort_weights": {
        "auction_score": 0.3,        # 竞价量能+高开强度
        "price_volume_score": 0.35,  # 量价配合（核心）
        "trend_strength_score": 0.2, # 连续上涨+均线趋势
        "support_score": 0.15,       # 短期支撑（靠近5日线）
    }
}

# --------------------------
# 工具函数（确保字段不丢失）
# --------------------------
def init_environment():
    os.makedirs(CONFIG["daily_result_dir"], exist_ok=True)
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【极致宽松版选股启动】{pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    log_msg(f"✅ 环境初始化完成，每日结果目录：{CONFIG['daily_result_dir']}")

def log_msg(msg):
    timestamp = pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{timestamp}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

def validate_data(df):
    required_cols = [
        'stock_code', 'date', 'close', 'open', 'volume', 'high', 'low',
        'consecutive_up_days', 'is_high_open', 'ma5', 'ma20',
        'macd_line', 'signal_line', 'macd_hist', 'volume_ratio_5d',
        'high_30d', 'pullback_ratio', 'pullback_days', 'bollinger_lower', 'rsi14',
        'auction_rise_ratio', 'auction_volume_ratio'
    ]
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Factortable缺少字段：{missing_cols}")
    log_msg(f"✅ 字段验证通过，共{len(df)}条记录")
    return df

# --------------------------
# 步骤1：衍生指标（修复字段丢失）
# --------------------------
def pre_calculate_indicators(df):
    log_msg("开始计算极致宽松版衍生指标...")
    
    # 1. 30日涨幅（3%-50%）：返回DataFrame确保保留stock_code
    def calc_30d_rise(group):
        group['rise_ratio_30d'] = (group['close'] - group['close'].shift(30)) / \
                                 group['close'].shift(30).replace(0, 0.0001)
        group['rise_ratio_30d'] = group['rise_ratio_30d'].clip(0, 1.0).fillna(0)
        return group[['stock_code', 'rise_ratio_30d']]  # 明确返回stock_code
    # 合并结果，确保字段不丢失
    rise_df = df.groupby('stock_code', group_keys=False).apply(
        calc_30d_rise, include_groups=False
    )
    df = df.merge(rise_df, on=['stock_code', df.index.name], how='left')  # 按索引合并
    
    # 2. 每日涨幅（0.1%-8%）
    df['daily_rise_ratio'] = (df['close'] - df['open']) / df['open'].replace(0, 0.0001)
    
    # 3. 连续量能达标记（≥0天：当天达标即算）
    def calc_consecutive_volume(group):
        vol_over = (group['volume_ratio_5d'] >= 1.0).astype(int)
        consecutive_vol = vol_over.groupby(vol_over.ne(vol_over.shift()).cumsum()).cumsum()
        group['consecutive_volume_days'] = consecutive_vol
        return group[['stock_code', 'consecutive_volume_days']]
    vol_df = df.groupby('stock_code', group_keys=False).apply(
        calc_consecutive_volume, include_groups=False
    )
    df = df.merge(vol_df, on=['stock_code', df.index.name], how='left')
    
    # 4. 价格偏离5日线（≤5%）
    df['price_above_ma5'] = (df['close'] >= df['ma5']).astype(int)
    df['ma5_deviation'] = (df['close'] - df['ma5']) / df['ma5'].replace(0, 0.0001)
    
    # 5. MACD动能（改为排序项，非筛选项）
    df['macd_cross'] = (df['macd_line'] >= df['signal_line']).astype(int)  # MACD金叉标记
    
    log_msg("✅ 极致宽松版衍生指标计算完成")
    return df

# --------------------------
# 步骤2：形态筛选（极致宽松，减少强制条件）
# --------------------------
def filter_single_shape(daily_df):
    params = CONFIG["short_term_shape"]
    daily_df = daily_df.copy()
    
    # 核心：仅保留5个关键强制条件（大幅减少叠加）
    cond1 = daily_df['consecutive_up_days'].between(1, 10)  # 1-10天连续上涨
    cond2 = daily_df['rise_ratio_30d'].between(0.03, 0.5)  # 3%-50%
    cond3 = daily_df['daily_rise_ratio'].between(0.001, 0.08)  # 0.1%-8%
    cond4 = daily_df['volume_ratio_5d'] >= 0.7  # 量能比≥0.7
    cond5 = daily_df['auction_volume_ratio'] >= 0.03  # 竞价量比≥3%
    # 可选条件：放宽高开限制（允许小幅低开）
    cond6 = daily_df['auction_rise_ratio'].between(-0.01, 0.04)
    
    # 组合筛选（仅6个条件，大幅降低门槛）
    total_cond = cond1 & cond2 & cond3 & cond4 & cond5 & cond6
    filtered_df = daily_df[total_cond].copy()
    
    log_msg(f"极致宽松版筛选：符合条件{len(filtered_df)}只（目标100-200）")
    if len(filtered_df) < 100:
        log_msg(f"⚠️ 仍不足，建议：1. 连续上涨≥1天→取消 2. 竞价量比≥2% 3. 30日涨幅≥2%（三选一）")
    elif len(filtered_df) > 250:
        log_msg(f"⚠️ 过多，建议：1. 量能比≥0.8 2. 单日涨幅≤7% 3. 竞价量比≥4%（三选一）")
    return filtered_df

# --------------------------
# 步骤3：排序（强化筛选，弥补宽松缺口）
# --------------------------
def score_by_short_term_factors(filtered_df):
    if len(filtered_df) == 0:
        return filtered_df
    df = filtered_df.copy()
    weights = CONFIG["sort_weights"]
    
    # 1. 竞价得分（量能优先，高开为辅）
    df['auction_vol_score'] = ((df['auction_volume_ratio'] - 0.03) / (0.2 - 0.03 + 1e-8)) * 70
    df['auction_rise_score'] = ((df['auction_rise_ratio'] + 0.01) / (0.04 + 0.01 + 1e-8)) * 30
    df['auction_score'] = (df['auction_vol_score'] + df['auction_rise_score']).clip(0, 100)
    
    # 2. 量价配合得分（核心）
    df['volume_strength_score'] = ((df['volume_ratio_5d'] - 0.7) / (2.0 - 0.7 + 1e-8)) * 60
    df['price_strength_score'] = ((df['daily_rise_ratio'] - 0.001) / (0.08 - 0.001 + 1e-8)) * 40
    df['price_volume_score'] = (df['volume_strength_score'] + df['price_strength_score']).clip(0, 100)
    
    # 3. 趋势强度得分（连续上涨+均线+MACD）
    up_days_max = df['consecutive_up_days'].max() if df['consecutive_up_days'].max() > 0 else 1
    df['up_days_score'] = (df['consecutive_up_days'] / up_days_max) * 40
    df['ma_trend_score'] = ((df['ma5'] - df['ma20']) / df['ma20'].replace(0, 0.0001) * 1000).clip(0, 30)
    df['macd_score'] = df['macd_cross'] * 30  # MACD金叉加30分
    df['trend_strength_score'] = df['up_days_score'] + df['ma_trend_score'] + df['macd_score']
    
    # 4. 短期支撑得分（靠近5日线+布林带）
    df['ma5_support_score'] = (1 - df['ma5_deviation'] / 0.05) * 70  # 5%为上限
    df['bollinger_support_score'] = (1.15 - df['close']/df['bollinger_lower']) / (0.15 + 1e-8) * 30
    df['support_score'] = (df['ma5_support_score'] + df['bollinger_support_score']).clip(0, 100)
    
    # 综合得分
    df['total_score'] = (
        df['auction_score'] * weights['auction_score'] +
        df['price_volume_score'] * weights['price_volume_score'] +
        df['trend_strength_score'] * weights['trend_strength_score'] +
        df['support_score'] * weights['support_score']
    )
    
    # 取前200个，确保stock_code存在
    df = df.sort_values(by='total_score', ascending=False).head(CONFIG["top_n"]).reset_index(drop=True)
    # 强制保留stock_code（防止极端情况丢失）
    if 'stock_code' not in df.columns:
        df['stock_code'] = filtered_df['stock_code'].iloc[:len(df)].values
    log_msg(f"排序完成：最高得分{df['total_score'].max():.2f}，前50平均{df['total_score'].head(50).mean():.2f}")
    return df

# --------------------------
# 步骤4：单交易日处理（确保字段完整）
# --------------------------
def process_single_trade_date(df, trade_date):
    log_msg(f"\n===== 处理交易日：{trade_date.strftime('%Y-%m-%d')} =====")
    daily_df = df[df['date'].dt.date == trade_date].copy()
    if len(daily_df) < 1000:
        log_msg(f"⚠️ 当日数据异常，跳过")
        return pd.DataFrame()
    
    filtered_df = filter_single_shape(daily_df)
    if len(filtered_df) == 0:
        log_msg("⚠️ 无符合标的，跳过")
        return pd.DataFrame()
    
    ranked_df = score_by_short_term_factors(filtered_df)
    # 双重确保stock_code存在
    if 'stock_code' not in ranked_df.columns:
        ranked_df['stock_code'] = filtered_df['stock_code'].iloc[:len(ranked_df)].values
    
    # 整理结果（仅保留必要字段）
    result_cols = [
        'stock_code', 'date', 'close', 'open', 'volume',
        'consecutive_up_days', 'rise_ratio_30d', 'daily_rise_ratio',
        'volume_ratio_5d', 'auction_rise_ratio', 'auction_volume_ratio',
        'rsi14', 'ma5', 'ma20', 'macd_cross', 'ma5_deviation',
        'total_score', 'auction_score', 'price_volume_score'
    ]
    # 过滤存在的字段，避免KeyError
    valid_cols = [col for col in result_cols if col in ranked_df.columns]
    top_df = ranked_df[valid_cols].reset_index(drop=True)
    top_df['trade_date'] = trade_date.strftime('%Y-%m-%d')
    
    date_str = trade_date.strftime('%Y%m%d')
    daily_save_path = os.path.join(CONFIG["daily_result_dir"], f"ultra_relax_{date_str}.csv")
    top_df.to_csv(daily_save_path, index=False, encoding='utf-8-sig')
    log_msg(f"✅ 当日选股完成：{len(top_df)}只标的")
    return top_df

# --------------------------
# 主流程
# --------------------------
def run_short_term_selection():
    try:
        init_environment()
        df = pd.read_parquet(CONFIG["factortable_path"])
        df['date'] = pd.to_datetime(df['date'])
        df = validate_data(df)
        # 重置索引，避免合并时索引混乱
        df = df.reset_index(drop=True)
        df = pre_calculate_indicators(df)
        
        trade_dates = sorted(df['date'].dt.date.unique())
        log_msg(f"检测到{len(trade_dates)}个交易日，开始极致宽松版选股...")
        
        all_results = []
        for trade_date in trade_dates:
            daily_result = process_single_trade_date(df, trade_date)
            if not daily_result.empty:
                all_results.append(daily_result)
        
        if all_results:
            final_result = pd.concat(all_results, ignore_index=True)
            final_result.to_csv(CONFIG["selection_result_path"], index=False, encoding='utf-8-sig')
            log_msg(f"\n✅ 极致宽松版选股完成！累计{len(final_result)}条记录，路径：{CONFIG['selection_result_path']}")
        else:
            log_msg(f"\n⚠️ 无选股结果，按日志提示取消连续上涨条件")
    except Exception as e:
        log_msg(f"❌ 选股失败：{str(e)}")
        raise

if __name__ == "__main__":
    run_short_term_selection()

[2025-10-24 12:51:42] ✅ 环境初始化完成，每日结果目录：./daily_quantity_priority
[2025-10-24 12:51:44] ✅ 字段验证通过，共899630条记录
[2025-10-24 12:51:44] 开始计算数量优先版衍生指标...
[2025-10-24 12:51:59] ✅ 数量优先版衍生指标计算完成
[2025-10-24 12:51:59] 检测到175个交易日，开始数量优先版选股...
[2025-10-24 12:51:59] 
===== 处理交易日：2025-02-06 =====
[2025-10-24 12:51:59] 数量优先版筛选：符合条件0只（目标100-200）
[2025-10-24 12:51:59] ⚠️ 仍不足，建议：1. 连续量能≥0天 2. 高开≥0.3% 3. 去掉MACD正条件（三选一）
[2025-10-24 12:51:59] ⚠️ 无符合标的，跳过
[2025-10-24 12:51:59] 
===== 处理交易日：2025-02-07 =====
[2025-10-24 12:52:00] 数量优先版筛选：符合条件0只（目标100-200）
[2025-10-24 12:52:00] ⚠️ 仍不足，建议：1. 连续量能≥0天 2. 高开≥0.3% 3. 去掉MACD正条件（三选一）
[2025-10-24 12:52:00] ⚠️ 无符合标的，跳过
[2025-10-24 12:52:00] 
===== 处理交易日：2025-02-10 =====
[2025-10-24 12:52:00] 数量优先版筛选：符合条件0只（目标100-200）
[2025-10-24 12:52:00] ⚠️ 仍不足，建议：1. 连续量能≥0天 2. 高开≥0.3% 3. 去掉MACD正条件（三选一）
[2025-10-24 12:52:00] ⚠️ 无符合标的，跳过
[2025-10-24 12:52:00] 
===== 处理交易日：2025-02-11 =====
[2025-10-24 12:52:00] 数量优先版筛选：符合条件0只（目标100-200）
[2025-10-24 12:52:00] ⚠️ 仍不足，建议：1. 连续量能≥0天 2. 高开≥0.3

KeyError: "['stock_code'] not in index"