In [12]:
import backtrader as bt
import pandas as pd
import numpy as np
from itertools import groupby

# ===================== 策略核心参数配置 =====================
STRATEGY_PARAMS = {
    # 选股参数
    "auction_ratio_threshold": 0.8,    # 竞价量比≤0.8
    "industry_cold_rank": 0.5,         # 行业热度后50%
    "3d_loss_threshold": -6.0,         # 前3日跌幅≥-6%（放宽以增加信号）
    "buy_down_ratio": 0.03,            # 买入价：前5日低点下方3%
    # 分风格交易参数
    "shock_params": {
        "stop_profit": 3.4, "stop_loss": -1.9, "trailing_stop": 1.8, "position": 0.20
    },
    "bear_params": {
        "stop_profit": 3.0, "stop_loss": -1.7, "trailing_stop": 1.5, "position": 0.15
    },
    "bull_params": {
        "stop_profit": 4.0, "stop_loss": -2.0, "trailing_stop": 2.5, "position": 0.20
    },
    # 风险控制
    "max_single_loss": 0.01,           # 单次最大亏损1%
    "cont_loss_pause": 6,              # 连续亏损6笔暂停（放宽阈值）
    "cont_loss_downsize": 4,           # 连续亏损4笔降仓至10%
    "pause_days": 2                    # 暂停交易天数
}

# ===================== 数据加载器（适配宽表数据） =====================
class StockDataFeed(bt.feeds.PandasData):
    """自定义数据加载器，适配宽表字段"""
    params = (
        # 基础行情字段
        ('datetime', 'date'),
        ('open', 'open'),
        ('high', 'high'),
        ('low', 'low'),
        ('close', 'close'),
        ('volume', 'volume'),
        ('openinterest', None),
        # 自定义字段（用于选股与计算）
        ('paused', 'paused'),
        ('stock_code', 'stock_code'),
        ('stock_name', 'stock_name'),
        ('pre_close', 'pre_close'),
        ('auc_volume', 'auc_volume'),
        ('sw_l1_industry_name', 'sw_l1_industry_name'),
        ('circulating_market_cap', 'circulating_market_cap'),
    )

# ===================== 超跌弱反弹策略类 =====================
class ReverseStrategy(bt.Strategy):
    params = STRATEGY_PARAMS

    def __init__(self):
        # 1. 全局变量初始化
        self.trading_days = []          # 有效交易日列表
        self.daily_data = {}            # 每日全市场数据：{date: df}
        self.selected_stocks = {}       # 每日选中股票：{date: [stock_code1, ...]}
        self.hold_stocks = {}           # 持仓股票：{stock_code: (buy_date, buy_price, params)}
        self.trade_results = []         # 交易结果：["profit", "loss"]
        self.pause_flag = False         # 暂停交易标记
        self.pause_count = 0            # 暂停天数计数

        # 2. 预计算全市场每日数据（用于选股）
        self._preprocess_all_data()
        # 3. 预计算行业热度排名（用于选股）
        self._precompute_industry_rank()
        # 4. 预计算市场波动率（用于风格识别）
        self._precompute_market_volatility()

    def _preprocess_all_data(self):
        """预加载全市场数据，按日期分组"""
        # 从第一个数据feed获取所有日期（假设所有股票数据日期一致）
        all_dates = self.datas[0].get_datetime_array()
        self.trading_days = [pd.Timestamp(date) for date in all_dates if date is not None]
        
        # 遍历所有股票数据，按日期整合
        for data in self.datas:
            if data.paused[0] == 1.0 or "ST" in data.stock_name[0]:
                continue  # 过滤停牌股、ST股
            
            # 提取单只股票的所有字段
            stock_df = pd.DataFrame({
                "date": [pd.Timestamp(data.datetime[i]) for i in range(len(data))],
                "stock_code": data.stock_code[0],
                "open": data.open.array,
                "high": data.high.array,
                "low": data.low.array,
                "close": data.close.array,
                "pre_close": data.pre_close.array,
                "auc_volume": data.auc_volume.array,
                "sw_l1_industry_name": data.sw_l1_industry_name[0],
                "circulating_market_cap": data.circulating_market_cap.array
            })
            
            # 按日期分组，添加到全局字典
            for date, group in stock_df.groupby("date"):
                if date not in self.daily_data:
                    self.daily_data[date] = []
                self.daily_data[date].append(group.iloc[0])
        
        # 转换为DataFrame格式
        for date in self.daily_data:
            self.daily_data[date] = pd.DataFrame(self.daily_data[date])

    def _precompute_industry_rank(self):
        """预计算每日行业热度排名（申万一级）"""
        self.industry_rank = {}
        all_industry_data = []
        
        # 整合每日行业收益
        for date in self.trading_days:
            if date not in self.daily_data:
                continue
            daily_df = self.daily_data[date]
            daily_df["daily_return"] = (daily_df["close"] / daily_df["pre_close"] - 1) * 100
            industry_return = daily_df.groupby("sw_l1_industry_name")["daily_return"].mean().reset_index()
            industry_return["date"] = date
            all_industry_data.append(industry_return)
        
        # 计算行业百分位排名
        all_industry_df = pd.concat(all_industry_data)
        for date, group in all_industry_df.groupby("date"):
            group["industry_percentile"] = group["daily_return"].rank(pct=True)
            self.industry_rank[date] = dict(zip(group["sw_l1_industry_name"], group["industry_percentile"]))

    def _precompute_market_volatility(self):
        """预计算每日市场波动率（全市场收益ATR）"""
        self.market_vol = {}
        market_returns = []
        
        # 计算每日全市场平均收益
        for date in self.trading_days:
            if date not in self.daily_data:
                continue
            daily_df = self.daily_data[date]
            daily_df["daily_return"] = (daily_df["close"] / daily_df["pre_close"] - 1) * 100
            market_returns.append({"date": date, "mean_return": daily_df["daily_return"].mean()})
        
        # 计算ATR（30日窗口）
        market_df = pd.DataFrame(market_returns).sort_values("date")
        market_df["return_diff"] = market_df["mean_return"].diff().abs()
        market_df["market_atr"] = market_df["return_diff"].rolling(30, min_periods=15).mean()
        self.market_vol = dict(zip(market_df["date"], market_df["market_atr"]))

    def _get_market_style(self, date):
        """根据日期获取市场风格（震荡/熊市/牛市）"""
        atr = self.market_vol.get(date, 2.5)
        if atr > 3.5:
            return "bull"
        elif atr < 1.5:
            return "bear"
        else:
            return "shock"

    def _select_daily_stocks(self, date):
        """每日选股逻辑（超跌+资金冷淡）"""
        if date not in self.daily_data or date not in self.industry_rank:
            return []
        
        daily_df = self.daily_data[date].copy()
        if len(daily_df) == 0:
            return []
        
        # 1. 计算选股所需因子（前N日数据）
        # 前3日收益（需跨日期获取历史数据）
        date_idx = self.trading_days.index(date)
        if date_idx < 3:
            return []  # 数据不足，无法计算前3日收益
        
        # 预计算单只股票的历史因子
        stock_factors = []
        for _, row in daily_df.iterrows():
            stock_code = row["stock_code"]
            # 获取该股票的历史数据（从backtrader datas中查找）
            stock_data = next((d for d in self.datas if d.stock_code[0] == stock_code), None)
            if not stock_data:
                continue
            
            # 计算前3日收益
            past_3_returns = []
            for i in range(1, 4):
                past_date = self.trading_days[date_idx - i]
                past_row = next((r for r in self.daily_data.get(past_date, []) if r["stock_code"] == stock_code), None)
                if past_row:
                    past_return = (past_row["close"] / past_row["pre_close"] - 1) * 100
                    past_3_returns.append(past_return)
            if len(past_3_returns) < 3:
                continue
            row["3d_return"] = sum(past_3_returns)
            
            # 计算前1日收益（判断跌幅收窄）
            prev_date = self.trading_days[date_idx - 1]
            prev_row = next((r for r in self.daily_data.get(prev_date, []) if r["stock_code"] == stock_code), None)
            row["prev_daily_return"] = (prev_row["close"] / prev_row["pre_close"] - 1) * 100 if prev_row else 0
            
            # 计算竞价量比（前5日竞价量均值）
            past_5_auc = []
            for i in range(1, 6):
                past_date = self.trading_days[date_idx - i] if date_idx - i >=0 else None
                if not past_date:
                    continue
                past_row = next((r for r in self.daily_data.get(past_date, []) if r["stock_code"] == stock_code), None)
                if past_row and past_row["auc_volume"] > 0:
                    past_5_auc.append(past_row["auc_volume"])
            if len(past_5_auc) < 3:
                continue
            row["auction_vol_5d_mean"] = np.mean(past_5_auc)
            row["auc_volume_ratio"] = row["auc_volume"] / row["auction_vol_5d_mean"]
            
            # 计算前5日低点
            past_5_low = []
            for i in range(1, 6):
                past_date = self.trading_days[date_idx - i] if date_idx - i >=0 else None
                if not past_date:
                    continue
                past_row = next((r for r in self.daily_data.get(past_date, []) if r["stock_code"] == stock_code), None)
                if past_row:
                    past_5_low.append(past_row["low"])
            if len(past_5_low) < 3:
                continue
            row["5d_low"] = min(past_5_low)
            
            # 添加行业百分位
            row["industry_percentile"] = self.industry_rank[date].get(row["sw_l1_industry_name"], 1.0)
            stock_factors.append(row)
        
        # 2. 筛选符合条件的股票
        factor_df = pd.DataFrame(stock_factors)
        if len(factor_df) == 0:
            return []
        
        filter_conditions = (
            (factor_df["3d_return"] <= self.p.3d_loss_threshold) &
            (factor_df["prev_daily_return"] >= -1.0) &
            (factor_df["auc_volume_ratio"] <= self.p.auction_ratio_threshold) &
            (factor_df["industry_percentile"] <= self.p.industry_cold_rank) &
            (factor_df["circulating_market_cap"] >= 50.0)  # 适度流动性过滤
        )
        selected_df = factor_df[filter_conditions].copy()
        if len(selected_df) == 0:
            return []
        
        # 3. 按超跌程度排序，选前5只
        selected_df = selected_df.nsmallest(min(5, len(selected_df)), "3d_return")
        return list(selected_df[["stock_code", "5d_low"]].to_dict("records"))

    def _adjust_position(self):
        """根据连续交易结果调整仓位/暂停交易"""
        if len(self.trade_results) < self.p.cont_loss_downsize:
            return None, False
        
        # 统计最近交易结果
        recent_results = self.trade_results[-self.p.cont_loss_pause:] if len(self.trade_results) >= self.p.cont_loss_pause else self.trade_results
        loss_count = sum(1 for res in recent_results if res == "loss")
        
        # 连续亏损达暂停阈值
        if len(recent_results) == self.p.cont_loss_pause and loss_count == self.p.cont_loss_pause:
            return None, True
        # 连续亏损达降仓阈值
        elif loss_count >= self.p.cont_loss_downsize:
            return 0.10, False
        return None, False

    def next(self):
        """每日执行逻辑：选股→T+1买入→T+2卖出"""
        current_date = pd.Timestamp(self.datas[0].datetime.date())
        current_idx = self.trading_days.index(current_date)
        
        # 1. 处理暂停交易
        if self.pause_flag:
            self.pause_count += 1
            if self.pause_count >= self.p.pause_days:
                self.pause_flag = False
                self.pause_count = 0
            return
        
        # 2. T+2卖出：处理2日前买入的股票
        if current_idx >= 2:
            sell_date = self.trading_days[current_idx - 2]
            stocks_to_sell = [code for code, (buy_date, _, _) in self.hold_stocks.items() if buy_date == sell_date]
            for stock_code in stocks_to_sell:
                buy_price, style_params = self.hold_stocks[stock_code][1], self.hold_stocks[stock_code][2]
                stock_data = next((d for d in self.datas if d.stock_code[0] == stock_code), None)
                if not stock_data:
                    continue
                
                # 计算止盈止损价
                take_profit = buy_price * (1 + style_params["stop_profit"] / 100)
                stop_loss = buy_price * (1 + style_params["stop_loss"] / 100)
                trailing_stop = buy_price * (1 + style_params["trailing_stop"] / 100)
                
                # 盘中触发判断
                if stock_data.high[0] >= take_profit:
                    sell_price = take_profit
                    trigger = "止盈"
                elif stock_data.low[0] <= stop_loss:
                    sell_price = stop_loss
                    trigger = "止损"
                elif stock_data.high[0] >= trailing_stop:
                    sell_price = max(buy_price, stock_data.close[0])  # 跌破成本价卖出
                    trigger = "移动止损"
                else:
                    sell_price = stock_data.close[0]
                    trigger = "收盘价"
                
                # 执行卖出
                self.sell(data=stock_data, size=self.getposition(stock_data).size)
                # 记录交易结果
                return_rate = (sell_price / buy_price - 1) * 100
                self.trade_results.append("profit" if return_rate > 0 else "loss")
                # 删除持仓记录
                del self.hold_stocks[stock_code]

        # 3. 每日选股：收盘后筛选次日买入标的
        if current_idx < len(self.trading_days) - 1:
            self.selected_stocks[current_date] = self._select_daily_stocks(current_date)

        # 4. T+1买入：处理昨日选中的股票
        if current_idx >= 1:
            buy_date = self.trading_days[current_idx - 1]
            if buy_date not in self.selected_stocks:
                return
            
            # 获取当日交易参数（仓位调整+市场风格）
            adjust_pos, self.pause_flag = self._adjust_position()
            if self.pause_flag:
                print(f"⚠️  连续{self.p.cont_loss_pause}笔亏损，暂停交易{self.p.pause_days}天（{current_date.strftime('%Y-%m-%d')}）")
                return
            
            # 遍历选中的股票，执行买入
            for stock_info in self.selected_stocks[buy_date]:
                stock_code = stock_info["stock_code"]
                target_5d_low = stock_info["5d_low"]
                stock_data = next((d for d in self.datas if d.stock_code[0] == stock_code), None)
                if not stock_data or self.getposition(stock_data).size > 0:
                    continue
                
                # 确定买入价（前5日低点下方3%）
                target_buy_price = target_5d_low * (1 - self.p.buy_down_ratio)
                if stock_data.low[0] <= target_buy_price <= stock_data.open[0]:
                    buy_price = target_buy_price
                elif stock_data.low[0] <= target_buy_price:
                    buy_price = stock_data.low[0]
                else:
                    continue  # 未达买入条件
                
                # 确定市场风格与交易参数
                style = self._get_market_style(current_date)
                style_params = self.p.bull_params if style == "bull" else self.p.bear_params if style == "bear" else self.p.shock_params
                # 计算仓位（基础仓位+动态调整）
                base_pos = style_params["position"]
                position_ratio = adjust_pos if adjust_pos is not None else base_pos
                # 单次风险控制
                max_loss = self.broker.getvalue() * self.p.max_single_loss
                loss_per_share = buy_price - (buy_price * (1 + style_params["stop_loss"] / 100))
                if loss_per_share <= 0:
                    continue
                max_shares = int(max_loss // (loss_per_share * 100)) * 100
                # 按资金计算最大可买份额
                cash_per_stock = self.broker.getcash() * position_ratio
                max_shares_by_cash = int(cash_per_stock // (buy_price * 100)) * 100
                shares = min(max_shares, max_shares_by_cash)
                if shares <= 0:
                    continue
                
                # 执行买入
                self.buy(data=stock_data, size=shares)
                # 记录持仓信息（用于T+2卖出）
                self.hold_stocks[stock_code] = (current_date, buy_price, style_params)

    def stop(self):
        """回测结束后统计结果"""
        # 提取所有交易记录
        trades = self.broker.get_trades()
        if len(trades) == 0:
            print("无有效交易记录")
            return
        
        # 计算核心指标
        returns = []
        for trade in trades:
            if trade.data.stock_code[0] not in self.hold_stocks:
                return_rate = (trade.price - trade.price) / trade.price * 100  # 避免异常
            else:
                buy_price = self.hold_stocks[trade.data.stock_code[0]][1]
                return_rate = (trade.price - buy_price) / buy_price * 100
            returns.append(return_rate)
        
        returns = np.array(returns)
        win_rate = len(returns[returns > 0]) / len(returns) * 100
        avg_profit = returns[returns > 0].mean() if len(returns[returns > 0]) > 0 else 0
        avg_loss = abs(returns[returns <= 0].mean()) if len(returns[returns <= 0]) > 0 else 0
        profit_loss_ratio = avg_profit / avg_loss if avg_loss != 0 else 0
        total_return = (self.broker.getvalue() - self.broker.startingcash) / self.broker.startingcash * 100
        
        # 连续交易统计
        max_cont_profit = 0
        max_cont_loss = 0
        if self.trade_results:
            for key, group in groupby(self.trade_results):
                group_len = len(list(group))
                if key == "profit" and group_len > max_cont_profit:
                    max_cont_profit = group_len
                elif key == "loss" and group_len > max_cont_loss:
                    max_cont_loss = group_len
        
        # 输出结果
        print("="*80)
        print(f"超跌弱反弹策略backtrader回测结果")
        print("="*80)
        print(f"初始资金：{self.broker.startingcash:.2f} 元 → 最终资金：{self.broker.getvalue():.2f} 元")
        print(f"总收益率：{total_return:.2f}% | 总交易次数：{len(trades)//2}（买入卖出各算1笔）")
        print(f"胜率：{win_rate:.2f}% | 平均盈利：{avg_profit:.2f}% | 平均亏损：{avg_loss:.2f}%")
        print(f"盈亏比：{profit_loss_ratio:.2f} | 最大连续盈利：{max_cont_profit} 笔")
        print(f"最大连续亏损：{max_cont_loss} 笔 | 最大回撤：{self._get_max_drawdown():.2f}%")
        print("="*80)

    def _get_max_drawdown(self):
        """计算最大回撤"""
        values = self.broker.get_value_history()
        if len(values) < 2:
            return 0.0
        peaks = np.maximum.accumulate(values)
        drawdowns = (peaks - values) / peaks
        return np.max(drawdowns) * 100


# ===================== 回测执行入口 =====================
def run_backtest(widetable_path):
    # 1. 加载宽表数据
    df = pd.read_parquet(widetable_path)
    df["date"] = pd.to_datetime(df["date"])
    df = df[(df["paused"] == 0.0) & (~df["stock_name"].str.contains("ST"))].copy()
    
    # 2. 初始化backtrader引擎
    cerebro = bt.Cerebro()
    cerebro.broker.setcash(100000.0)  # 初始资金
    cerebro.broker.setcommission(commission=0.0003)  # 佣金0.03%（模拟实盘）
    
    # 3. 按股票代码拆分数据，添加到cerebro
    stock_codes = df["stock_code"].unique()
    for code in stock_codes[:500]:  # 可选前500只股票（全市场数据量大，可分批测试）
        stock_df = df[df["stock_code"] == code].sort_values("date").reset_index(drop=True)
        if len(stock_df) < 60:  # 过滤数据不足60天的股票
            continue
        # 创建数据feed
        data_feed = StockDataFeed(dataname=stock_df, fromdate=df["date"].min(), todate=df["date"].max())
        cerebro.adddata(data_feed, name=code)
    
    # 4. 添加策略
    cerebro.addstrategy(ReverseStrategy)
    
    # 5. 添加分析器（可选：绘制资金曲线）
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name="drawdown")
    cerebro.addanalyzer(bt.analyzers.TimeReturn, _name="timereturn")
    
    # 6. 运行回测
    print("开始回测...")
    results = cerebro.run()
    print("回测完成！")
    
    # 7. 绘制资金曲线（可选）
    cerebro.plot(style="candlestick")

if __name__ == "__main__":
    # 替换为你的宽表路径
    WIDETABLE_PATH = r'D:\workspace\xiaoyao\data\widetable.parquet'
    run_backtest(WIDETABLE_PATH)

SyntaxError: invalid decimal literal (2502874562.py, line 236)