In [1]:
import pandas as pd
import numpy as np
import talib as ta
import os
from datetime import datetime

# --------------------------
# 配置参数（关键：factors_switch 和 代码引用完全一致）
# --------------------------
CONFIG = {
    "widetable_path": r'D:\workspace\xiaoyao\data\widetable.parquet',  # 你的宽表路径
    "factor_output_path": r'./custom_factors.parquet',  # 因子输出路径
    "log_path": r'./factor_calc_log.txt',
    # 重点：键名用 "factors_switch"（和代码里的引用保持一致，之前少了s）
    "factors_switch": {
        "trend_factors": True,    # 趋势类因子
        "volume_factors": True,   # 量能类因子
        "volatility_factors": True, # 波动类因子
        "industry_factors": True  # 行业类因子
    }
}

# --------------------------
# 工具函数
# --------------------------
def init_log():
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【指标计算启动】{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

def log_msg(msg):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{timestamp}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

# --------------------------
# 加载基础数据
# --------------------------
def load_base_data():
    log_msg("加载基础数据...")
    core_cols = [
        'date', 'stock_code', 'stock_name', 'open', 'close', 'high', 'low',
        'pre_close', 'volume', 'money', 'sw_l1_industry_name', 'paused'
    ]
    df = pd.read_parquet(CONFIG["widetable_path"], columns=core_cols)
    df["date"] = pd.to_datetime(df["date"])
    df = df[df['paused'] == 0].copy()  # 过滤停牌
    df = df.sort_values(['stock_code', 'date']).reset_index(drop=True)
    log_msg(f"基础数据加载完成：{len(df)}条记录，{df['stock_code'].nunique()}只股票")
    return df

# --------------------------
# 按需计算因子（引用配置时用 "factors_switch"，和配置键名一致）
# --------------------------
def calculate_factors(df):
    log_msg("开始计算因子（按需选择）...")
    df = df.copy()
    # 关键修正：这里用 CONFIG["factors_switch"]（带s），和配置里的键名完全一致
    switch = CONFIG["factors_switch"]
    
    # 1. 趋势类因子
    if switch["trend_factors"]:
        log_msg("计算趋势类因子...")
        df['close_change'] = (df['close'] / df['pre_close'] - 1) * 100  # 日涨跌幅(%)
        df['ma5'] = df.groupby('stock_code')['close'].transform(lambda x: ta.SMA(x, 5))
        df['ma20'] = df.groupby('stock_code')['close'].transform(lambda x: ta.SMA(x, 20))
        df['ma5_above_ma20'] = (df['ma5'] >= df['ma20']).astype(int)
    
    # 2. 量能类因子
    if switch["volume_factors"]:
        log_msg("计算量能类因子...")
        df['volume_ratio_5d'] = df.groupby('stock_code')['volume'].transform(
            lambda x: x / x.rolling(5, min_periods=1).mean().shift(1).replace(0, 0.0001)
        )
        df['turnover'] = df['money'] / (df['close'] * 100)  # 估算换手率
    
    # 3. 波动类因子
    if switch["volatility_factors"]:
        log_msg("计算波动类因子...")
        df['amplitude'] = (df['high'] / df['low'] - 1) * 100  # 振幅(%)
        df['atr'] = df.groupby('stock_code').apply(
            lambda x: ta.ATR(x['high'], x['low'], x['close'], timeperiod=14)
        ).reset_index(level=0, drop=True)
    
    # 4. 行业类因子
    if switch["industry_factors"]:
        log_msg("计算行业类因子...")
        df['industry_rise_rank'] = df.groupby(['date', 'sw_l1_industry_name'])['close_change'].transform(
            lambda x: x.rank(ascending=False, method='min').astype(int)
        )
    
    log_msg("因子计算完成")
    return df

# --------------------------
# 保存因子数据
# --------------------------
def save_factors(df):
    df.to_parquet(CONFIG["factor_output_path"], index=False)
    log_msg(f"因子数据保存至：{CONFIG['factor_output_path']}")
    return df

# --------------------------
# 主函数
# --------------------------
def run_factor_calc():
    try:
        init_log()
        base_df = load_base_data()
        factor_df = calculate_factors(base_df)
        factor_df = save_factors(factor_df)
        return factor_df
    except Exception as e:
        log_msg(f"因子计算失败：{str(e)}")
        raise

# 执行指标计算
if __name__ == "__main__":
    factors = run_factor_calc()

[2025-10-26 16:23:50] 加载基础数据...
[2025-10-26 16:23:58] 基础数据加载完成：3438070条记录，5284只股票
[2025-10-26 16:23:58] 开始计算因子（按需选择）...
[2025-10-26 16:23:58] 计算趋势类因子...
[2025-10-26 16:24:02] 计算量能类因子...
[2025-10-26 16:24:07] 计算波动类因子...


  df['atr'] = df.groupby('stock_code').apply(


[2025-10-26 16:24:11] 计算行业类因子...
[2025-10-26 16:24:22] 因子计算完成
[2025-10-26 16:24:29] 因子数据保存至：./custom_factors.parquet


In [2]:
import pandas as pd
import numpy as np
import os
from datetime import datetime

# --------------------------
# 配置参数
# --------------------------
CONFIG = {
    "factor_input_path": r'./custom_factors.parquet',  # 上一步生成的因子
    "selection_output_path": r'./daily_selection.csv',  # 选股结果
    "log_path": r'./selection_log.txt',
    # 选股宽松条件（确保每日有票可交易）
    "filters": {
        "min_close_change": -5,  # 当日涨跌幅≥-5%（不选暴跌股）
        "max_amplitude": 15,     # 振幅≤15%（不选极端波动）
        "min_volume_ratio": 0.8, # 量能比≥0.8（有量能）
        "top_n": 10              # 每日选前10只（确保有交易标的）
    }
}

# --------------------------
# 工具函数
# --------------------------
def init_log():
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【选股启动】{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

def log_msg(msg):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{timestamp}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

# --------------------------
# 加载因子数据
# --------------------------
def load_factors():
    log_msg("加载因子数据...")
    df = pd.read_parquet(CONFIG["factor_input_path"])
    df["date"] = pd.to_datetime(df["date"])
    # 确保必要字段存在
    required_cols = ['date', 'stock_code', 'stock_name', 'close_change', 'volume_ratio_5d', 'amplitude']
    assert all(col in df.columns for col in required_cols), f"缺少必要字段：{[col for col in required_cols if col not in df.columns]}"
    log_msg(f"因子数据加载完成：{df['date'].nunique()}个交易日")
    return df

# --------------------------
# 每日选股（确保有交易）
# --------------------------
def select_daily(df):
    log_msg("开始每日选股（确保每日有票）...")
    all_dates = sorted(df['date'].unique())
    selection_results = []
    
    for date in all_dates:
        daily_df = df[df['date'] == date].copy()
        if len(daily_df) < 100:  # 过滤数据过少的日期
            log_msg(f"日期{date.date()}数据不足，跳过")
            continue
        
        # 宽松过滤条件（确保有足够标的）
        filtered = daily_df[
            (daily_df['close_change'] >= CONFIG['filters']['min_close_change']) &
            (daily_df['amplitude'] <= CONFIG['filters']['max_amplitude']) &
            (daily_df['volume_ratio_5d'] >= CONFIG['filters']['min_volume_ratio'])
        ].copy()
        
        # 若过滤后不足，放宽条件（确保每日有票）
        if len(filtered) < CONFIG['filters']['top_n']:
            log_msg(f"日期{date.date()}严格过滤后不足，放宽条件")
            filtered = daily_df.sort_values('volume_ratio_5d', ascending=False).head(CONFIG['filters']['top_n'] * 2)
        
        # 按量能比排序，取前N只
        selected = filtered.sort_values('volume_ratio_5d', ascending=False).head(CONFIG['filters']['top_n']).copy()
        selected['selection_date'] = date  # 标记选股日
        selection_results.append(selected)
        log_msg(f"日期{date.date()}选入{len(selected)}只股票")
    
    # 合并结果
    final = pd.concat(selection_results, ignore_index=True)
    final.to_csv(CONFIG["selection_output_path"], index=False, encoding='utf-8-sig')
    log_msg(f"选股完成，共{len(final)}条记录，保存至{CONFIG['selection_output_path']}")
    return final

# --------------------------
# 主函数
# --------------------------
def run_selection():
    try:
        init_log()
        factor_df = load_factors()
        selection = select_daily(factor_df)
        return selection
    except Exception as e:
        log_msg(f"选股失败：{str(e)}")
        raise

# 执行选股
if __name__ == "__main__":
    daily_picks = run_selection()

[2025-10-26 16:25:08] 加载因子数据...
[2025-10-26 16:25:11] 因子数据加载完成：678个交易日
[2025-10-26 16:25:11] 开始每日选股（确保每日有票）...
[2025-10-26 16:25:11] 日期2023-01-03严格过滤后不足，放宽条件
[2025-10-26 16:25:11] 日期2023-01-03选入10只股票
[2025-10-26 16:25:11] 日期2023-01-04选入10只股票
[2025-10-26 16:25:11] 日期2023-01-05选入10只股票
[2025-10-26 16:25:11] 日期2023-01-06选入10只股票
[2025-10-26 16:25:11] 日期2023-01-09选入10只股票
[2025-10-26 16:25:11] 日期2023-01-10选入10只股票
[2025-10-26 16:25:11] 日期2023-01-11选入10只股票
[2025-10-26 16:25:11] 日期2023-01-12选入10只股票
[2025-10-26 16:25:11] 日期2023-01-13选入10只股票
[2025-10-26 16:25:11] 日期2023-01-16选入10只股票
[2025-10-26 16:25:11] 日期2023-01-17选入10只股票
[2025-10-26 16:25:11] 日期2023-01-18选入10只股票
[2025-10-26 16:25:11] 日期2023-01-19选入10只股票
[2025-10-26 16:25:11] 日期2023-01-20选入10只股票
[2025-10-26 16:25:11] 日期2023-01-30选入10只股票
[2025-10-26 16:25:11] 日期2023-01-31选入10只股票
[2025-10-26 16:25:11] 日期2023-02-01选入10只股票
[2025-10-26 16:25:11] 日期2023-02-02选入10只股票
[2025-10-26 16:25:11] 日期2023-02-03选入10只股票
[2025-10-26 16:25:11] 日期2023-02-06选入10只股票
[2

In [3]:
import pandas as pd
import numpy as np
import os
from datetime import datetime

# --------------------------
# 配置参数
# --------------------------
CONFIG = {
    "selection_path": r'./daily_selection.csv',  # 上一步选股结果
    "widetable_path": r'D:\workspace\xiaoyao\data\widetable.parquet',  # 宽表路径
    "result_path": r'./backtest_result.csv',  # 回测结果
    "fund_growth_path": r'./fund_growth.csv',  # 资金增长曲线
    "log_path": r'./backtest_log.txt',
    "initial_fund": 100000.0  # 初始资金
}

# --------------------------
# 工具函数
# --------------------------
def init_log():
    with open(CONFIG["log_path"], 'w', encoding='utf-8') as f:
        f.write(f"【评测启动】{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

def log_msg(msg):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = f"[{timestamp}] {msg}"
    print(log_line)
    with open(CONFIG["log_path"], 'a', encoding='utf-8') as f:
        f.write(log_line + "\n")

# --------------------------
# 加载数据并建立日期映射
# --------------------------
def load_data():
    log_msg("加载选股结果和价格数据...")
    # 1. 加载选股结果
    selection = pd.read_csv(CONFIG["selection_path"])
    selection["selection_date"] = pd.to_datetime(selection["selection_date"])
    selection["stock_code"] = selection["stock_code"].astype(str)
    
    # 2. 加载价格数据并建立[股票+日期]→价格映射
    price_cols = ['date', 'stock_code', 'open', 'close', 'paused']
    price_df = pd.read_parquet(CONFIG["widetable_path"], columns=price_cols)
    price_df["date"] = pd.to_datetime(price_df["date"])
    price_df["stock_code"] = price_df["stock_code"].astype(str)
    price_df = price_df[price_df['paused'] == 0].copy()  # 过滤停牌
    
    # 建立映射：(stock_code, date) → open/close
    open_map = price_df.set_index(['stock_code', 'date'])['open'].to_dict()
    close_map = price_df.set_index(['stock_code', 'date'])['close'].to_dict()
    
    log_msg(f"数据加载完成：{len(selection)}条选股记录，{len(price_df)}条价格记录")
    return selection, open_map, close_map

# --------------------------
# 计算交易收益（T+1买，T+2卖）
# --------------------------
def calculate_returns(selection, open_map, close_map):
    log_msg("计算交易收益...")
    # 新增买卖日期（T+1买入，T+2卖出）
    selection['buy_date'] = selection['selection_date'] + pd.Timedelta(days=1)
    selection['sell_date'] = selection['selection_date'] + pd.Timedelta(days=2)
    
    # 计算买入价（T+1开盘价）和卖出价（T+2收盘价）
    def get_prices(row):
        stock = row['stock_code']
        buy_date = row['buy_date']
        sell_date = row['sell_date']
        
        buy_price = open_map.get((stock, buy_date), np.nan)
        sell_price = close_map.get((stock, sell_date), np.nan)
        return pd.Series([buy_price, sell_price])
    
    selection[['buy_price', 'sell_price']] = selection.apply(get_prices, axis=1)
    
    # 过滤无效价格
    valid = selection.dropna(subset=['buy_price', 'sell_price'])
    valid = valid[(valid['buy_price'] > 0) & (valid['sell_price'] > 0)].copy()
    
    # 计算单只股票收益
    valid['return_rate'] = (valid['sell_price'] / valid['buy_price'] - 1) * 100  # 收益率(%)
    log_msg(f"有效交易{len(valid)}笔，无效交易{len(selection)-len(valid)}笔")
    return valid

# --------------------------
# 按日汇总收益和资金增长
# --------------------------
def summarize_results(valid_trades):
    log_msg("汇总每日收益和资金增长...")
    # 1. 按日汇总平均收益
    daily_summary = valid_trades.groupby('selection_date').agg(
        avg_return=('return_rate', 'mean'),  # 当日平均收益率(%)
        trade_count=('stock_code', 'count')  # 当日交易数量
    ).reset_index()
    
    # 2. 计算每日资金增长率（增长率=1 + 0.5*平均收益/100）
    daily_summary['growth_rate'] = 1 + 0.5 * (daily_summary['avg_return'] / 100)
    
    # 3. 计算累计资金（连乘增长率）
    daily_summary = daily_summary.sort_values('selection_date')
    daily_summary['cumulative_fund'] = CONFIG['initial_fund'] * daily_summary['growth_rate'].cumprod()
    
    # 4. 保存结果
    valid_trades.to_csv(CONFIG["result_path"], index=False, encoding='utf-8-sig')
    daily_summary.to_csv(CONFIG["fund_growth_path"], index=False, encoding='utf-8-sig')
    
    # 5. 打印核心指标
    final_fund = daily_summary['cumulative_fund'].iloc[-1] if not daily_summary.empty else CONFIG['initial_fund']
    total_return = (final_fund / CONFIG['initial_fund'] - 1) * 100
    log_msg(f"\n核心评测结果：")
    log_msg(f"初始资金：{CONFIG['initial_fund']:.2f}元")
    log_msg(f"最终资金：{final_fund:.2f}元")
    log_msg(f"累计收益率：{total_return:.2f}%")
    log_msg(f"总交易日：{len(daily_summary)}天")
    log_msg(f"平均每日收益率：{daily_summary['avg_return'].mean():.2f}%")
    
    return daily_summary

# --------------------------
# 主函数
# --------------------------
def run_backtest():
    try:
        init_log()
        selection, open_map, close_map = load_data()
        valid_trades = calculate_returns(selection, open_map, close_map)
        if len(valid_trades) == 0:
            log_msg("无有效交易，评测终止")
            return None
        daily_summary = summarize_results(valid_trades)
        return daily_summary
    except Exception as e:
        log_msg(f"评测失败：{str(e)}")
        raise

# 执行评测
if __name__ == "__main__":
    fund_growth = run_backtest()

[2025-10-26 16:25:33] 加载选股结果和价格数据...
[2025-10-26 16:25:49] 数据加载完成：6780条选股记录，3438070条价格记录
[2025-10-26 16:25:49] 计算交易收益...
[2025-10-26 16:25:51] 有效交易3860笔，无效交易2920笔
[2025-10-26 16:25:51] 汇总每日收益和资金增长...
[2025-10-26 16:25:51] 
核心评测结果：
[2025-10-26 16:25:51] 初始资金：100000.00元
[2025-10-26 16:25:51] 最终资金：105200.60元
[2025-10-26 16:25:51] 累计收益率：5.20%
[2025-10-26 16:25:51] 总交易日：388天
[2025-10-26 16:25:51] 平均每日收益率：0.05%
