In [None]:
import pandas as pd
import numpy as np
import pickle
from pathlib import Path
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# -------------------------- 1. 配置路径（与阶段1保持一致） --------------------------
PRECOMPUTE_DIR = ".\\precomputed_data"
SELECTION_SAVE_DIR = ".\\selection_results_optimized"  # 优化后结果目录
Path(SELECTION_SAVE_DIR).mkdir(exist_ok=True, parents=True)

# -------------------------- 2. 读取预计算数据 --------------------------
print("1. 读取预计算数据...")
df = pd.read_csv(
    f"{PRECOMPUTE_DIR}/stock_data_with_indicators.csv",
    parse_dates=["date"]
)
with open(f"{PRECOMPUTE_DIR}/valid_stocks.pkl", "rb") as f:
    valid_stocks = pickle.load(f)
with open(f"{PRECOMPUTE_DIR}/market_days.pkl", "rb") as f:
    market_days = pickle.load(f)

# -------------------------- 3. 关键优化：放宽选股逻辑 --------------------------
print("2. 计算每日行业涨停数...")
daily_industry_limit = df.groupby(
    ["date", "sw_l1_industry_code"], observed=True
)["is_limit_up"].sum().reset_index()
daily_industry_limit.columns = ["date", "industry_code", "limit_up_count"]

print("3. 执行优化后选股逻辑...")
daily_selection_result = []

for date in tqdm(market_days, desc="每日选股进度"):
    date_df = df[df["date"] == date].copy()
    if len(date_df) < 2:
        daily_selection_result.append({
            "date": date, "selected_stocks": "", "market_limit_count": 0,
            "top3_industries": "", "selection_status": "无有效股票"
        })
        continue

    # 优化1：放宽市场情绪阈值（涨停数≥10，原>20）
    market_limit_count = date_df["is_limit_up"].sum()
    if market_limit_count < 10:  # 从>20改为<10才跳过
        daily_selection_result.append({
            "date": date, "selected_stocks": "", "market_limit_count": market_limit_count,
            "top3_industries": "", "selection_status": "市场情绪不达标（涨停数<10）"
        })
        continue

    # 优化2：主流行业筛选容错（不足3个行业则取全部，原直接跳过）
    date_industry_data = daily_industry_limit[daily_industry_limit["date"] == date]
    top3_industries = date_industry_data.nlargest(max(1, len(date_industry_data)), "limit_up_count")["industry_code"].tolist()
    top3_industries_str = ",".join([str(ind) for ind in top3_industries])

    # 优化3：放宽行业内选股条件
    eligible_stocks = date_df[
        (date_df["sw_l1_industry_code"].isin(top3_industries))
        & (date_df["paused"] == 0.0)
        & (date_df["volume_ratio"] > 1.0)  # 量比从>1.2改为>1.0
        & (  # 涨幅从3%-9.8%改为2%-9.8%
            date_df["is_limit_up"] | 
            ((date_df["price_change"] > 2) & (date_df["price_change"] < 9.8))
        )
    ].copy()

    # 优化4：容错机制（不足4只时，至少选1只，原直接放弃）
    if len(eligible_stocks) == 0:
        # 若无符合条件股票，退而求其次：选主流行业内涨幅前2的股票
        fallback_stocks = date_df[
            (date_df["sw_l1_industry_code"].isin(top3_industries))
            & (date_df["paused"] == 0.0)
            & (date_df["price_change"] > 1)  # 涨幅>1%即可
        ].sort_values("price_change", ascending=False)
        eligible_stocks = fallback_stocks.head(2)  # 至少选2只

    # 排序与限制数量（最多4只，最少1只）
    eligible_stocks = eligible_stocks.sort_values("price_change", ascending=False)
    selected_codes = eligible_stocks["stock_code"].head(min(4, len(eligible_stocks))).tolist()
    selected_codes_str = ",".join(selected_codes)

    # 记录结果
    status = "选股成功（正常）" if len(eligible_stocks) >= 4 else "选股成功（容错）"
    daily_selection_result.append({
        "date": date, "selected_stocks": selected_codes_str,
        "market_limit_count": market_limit_count, "top3_industries": top3_industries_str,
        "selection_status": status
    })

# -------------------------- 4. 保存与统计优化结果 --------------------------
print("\n4. 保存优化后选股结果...")
selection_df = pd.DataFrame(daily_selection_result)
selection_df["selected_count"] = selection_df["selected_stocks"].apply(
    lambda x: len(x.split(",")) if x != "" else 0
)
selection_csv_path = f"{SELECTION_SAVE_DIR}/daily_selected_stocks_optimized.csv"
selection_df.to_csv(selection_csv_path, index=False, encoding="utf-8-sig")

# 统计优化后指标
valid_days_normal = len(selection_df[selection_df["selection_status"] == "选股成功（正常）"])
valid_days_fallback = len(selection_df[selection_df["selection_status"] == "选股成功（容错）"])
total_valid_days = valid_days_normal + valid_days_fallback
total_selection_count = selection_df["selected_count"].sum()
avg_daily_selection = selection_df["selected_count"].mean()

print("5. 优化后选股结果统计：")
print(f" - 总交易日数：{len(market_days)} 天")
print(f" - 选股成功天数：{total_valid_days} 天（{total_valid_days/len(market_days)*100:.1f}%）")
print(f"   - 正常选股：{valid_days_normal} 天")
print(f"   - 容错选股：{valid_days_fallback} 天")
print(f" - 累计选股总数：{total_selection_count} 只")
print(f" - 平均每日选股数：{avg_daily_selection:.1f} 只")
print(f" - 优化后结果文件路径：{selection_csv_path}")

# 打印前5条示例（含正常+容错）
print("\n前5条选股成功示例（含正常/容错）：")
success_sample = selection_df[selection_df["selection_status"].str.contains("选股成功")].head(5)
print(success_sample[["date", "selected_stocks", "market_limit_count", "selection_status"]].to_string(index=False))

1. 读取预计算数据...
数据验证：
 - 有效股票数：500 只
 - 全市场交易日数：680 天
 - 核心数据总行数：324618 行

2. 计算每日行业涨停数...
3. 执行每日选股（养家心法逻辑）...


每日选股进度: 100%|██████████| 680/680 [00:01<00:00, 448.12it/s]



4. 保存选股结果...
5. 选股结果统计：
 - 总交易日数：680 天
 - 选股成功天数：20 天（2.9%）
 - 累计选股总数：80 只
 - 平均每日选股数：0.1 只
 - 选股结果文件路径：.\selection_results/daily_selected_stocks.csv

前5条选股成功的示例：
      date                                 selected_stocks  market_limit_count            top3_industries
2024-02-08 002217.XSHE,603220.XSHG,688387.XSHG,002583.XSHE                  28 801080.0,801770.0,801200.0
2024-02-19 603019.XSHG,002463.XSHE,000977.XSHE,301236.XSHE                  30 801750.0,801080.0,801770.0
2024-04-17 301039.XSHE,002085.XSHE,603042.XSHG,301215.XSHE                  30 801880.0,801080.0,801770.0
2024-09-27 002500.XSHE,002945.XSHE,600208.XSHG,002797.XSHE                  33 801790.0,801750.0,801180.0
2024-09-30 300674.XSHE,688327.XSHG,300773.XSHE,688981.XSHG                 223 801790.0,801080.0,801750.0
