In [1]:
# import logging
import sys

# stdout=sys.stdout
sys.path.append('../../')
import os
import random
import time

import matplotlib
import matplotlib.pyplot as plt
import mplfinance as mpf
# pip install --upgrade mplfinance
# sys.stdout=stdout
import numpy as np
# import numpy as np
import pandas as pd
from JohnsonUtil import commonTips as cct
from JohnsonUtil import johnson_cons as ct
from JSONData import sina_data
from JSONData import tdx_data_Day as tdd
from mplfinance.original_flavor import candlestick_ohlc

# from JSONData import tdx_hdf5_api as h5a

%matplotlib widget
plt.rc('font', family='SimHei', size=13)
matplotlib.get_backend()
# jupyter nbextension enable --py widgetsnbextension

os.environ['NUMEXPR_MAX_THREADS'] = '12'
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"
import nest_asyncio

nest_asyncio.apply()
from sklearn.preprocessing import StandardScaler


[DEBUG] Path Mode: Python Script (__file__). Path: D:\MacTools\WorkFile\WorkSpace\pyQuant3\stock_standalone\JohnsonUtil
使用本地配置: D:\MacTools\WorkFile\WorkSpace\pyQuant3\stock_standalone\JohnsonUtil\global.ini
log_f: instock_tk.log


In [2]:
# signal_df 已包含 signal 和 future_return
# 3. 策略回测逻辑（多状态 signal）
# def calc_strategy_return(df):
#     df = df.copy()
#     # 买入信号：1 或 2
#     df['position'] = df['signal'].apply(lambda x: 1 if x in [1,2] else 0)
#     # 卖出信号：-1，清仓（这里暂时只考虑做多策略）
#     df['strategy_return'] = df['position'] * df['future_return']
#     # 累积收益
#     df['cumulative_strategy_return'] = (1 + df['strategy_return']).cumprod()
#     return df

# # signal_df = calc_strategy_return(signal_df)

def calc_strategy_return(signal_df, signal_col='signal', price_col1='lastp1d', price_col2='lastp2d'):
    """
    根据多状态信号计算策略收益和累计收益
    只在 EVAL_STATE=1(启动) 或 2(主升) 建仓，其余状态不建仓
    
    参数：
        signal_df : pd.DataFrame
            包含 signal, EVAL_STATE, lastp1d, lastp2d 等列
        signal_col : str
            信号列名
        price_col1 : str
            当日价格列（用于计算未来收益）
        price_col2 : str
            前一日价格列
    
    返回：
        pd.DataFrame : 原 DataFrame 增加如下列
            future_return, filtered_signal, strategy_return,
            cumulative_strategy_return, cumulative_market_return
    """
    df = signal_df.copy()
    
    # 1日未来收益
    df['future_return'] = df[price_col1] / df[price_col2] - 1

    # 策略信号过滤，只在启动/主升建仓
    df['filtered_signal'] = df.apply(
        lambda row: row[signal_col] if row.get('EVAL_STATE', 0) in [1, 2] else 0, axis=1
    )

    # 策略收益
    df['strategy_return'] = df['filtered_signal'] * df['future_return']

    # 累计收益
    df['cumulative_strategy_return'] = (1 + df['strategy_return']).cumprod()
    df['cumulative_market_return'] = (1 + df['future_return']).cumprod()

    return df

def calc_eval_state(df):
    """
    根据价格和指标计算 EVAL_STATE
    9 = 空头, 1 = 启动, 2 = 主升, 3 = 回撤, 0 = 无效数据
    """
    def eval_row(row):
        price = row['lastp1d']
        upper = row.get('upper1', 0)
        ma = row.get('ma5', 0)  # 可以改成守护 MA 支撑线，例如 ma5 或 ma20
        if price <= 0:
            return 0
        elif price < ma:
            return 9  # 空头
        elif price > upper and row.get('vol', 0) > 0:  # 放量突破 upper
            return 1  # 启动
        elif price >= upper:
            return 2  # 主升
        else:
            return 3  # 回撤

    df['EVAL_STATE'] = df.apply(eval_row, axis=1)
    return df


# 特征标准化/归一化

# 特征筛选或降维

In [3]:
def generate_df_vect_daily_features_safe(df, lastdays=5):
    """多股票，生成每只股票最近 lastdays 的特征字典列表"""
    features_list = []
    cols_map = {
        'open': 'lasto',
        'high': 'lasth',
        'low': 'lastl',
        'close': 'lastp',
        'vol': 'lastv',
        'upper': 'upper',
        'ma5d': 'ma5',
        'ma20d': 'ma20',
        'ma60d': 'ma60',
        'perlastp': 'perc',
        'perd': 'per'
    }
    
    for code, df_stock in df.groupby(level=0):
        feat = {'code': str(code)}
        df_stock = df_stock.sort_index(level=1)  # 按日期升序
        
        for da in range(1, lastdays+1):
            for col, prefix in cols_map.items():
                # if col in df_stock.columns and len(df_stock) >= da:
                #     feat[f'{prefix}{da}d'] = df_stock[col].iloc[-da]
                # else:
                #     feat[f'{prefix}{da}d'] = 0
                if col in df_stock.columns and len(df_stock) >= da:
                    # upper 列不加 d 后缀
                    if col == 'upper':
                        feat[f'{prefix}{da}'] = df_stock[col].iloc[-da]
                    else:
                        feat[f'{prefix}{da}d'] = df_stock[col].iloc[-da]
                else:
                    if col == 'upper':
                        feat[f'{prefix}{da}'] = 0
                    else:
                        feat[f'{prefix}{da}d'] = 0
            
            # eval / signal
            for suffix in ['eval', 'signal']:
                colname = f'{suffix}{da}d'
                if colname in df_stock.columns and len(df_stock) >= da:
                    feat[colname] = df_stock[colname].iloc[-da]
                else:
                    feat[colname] = 0
        features_list.append(feat)
    
    return pd.DataFrame(features_list)

In [4]:
code_l=['920274','300342','300696', '603091', '605167']

resample='d'

# df = tdd.get_tdx_exp_all_LastDF_DL(code_l, dt=ct.Resample_LABELS_Days[resample], resample=resample)

df, tdxdata = tdd.get_append_lastp_to_df()
# df_feat = tdd.generate_df_vect_daily_features(df,lastdays=cct.compute_lastdays)
# 排除非数值列（code 和 date）
# 假设 df_feat 中有 trade_signal

# # 假设 signal_df 的 index 是 MultiIndex (code, date)，或者按日期升序
# df = df.sort_index(level=1)  # 按日期升序

# # 计算1日未来收益
# # df['future_return'] = df.groupby(level=0)['close'].shift(-1) / df['close'] - 1
# # 1日收益 = (今天 - 昨天) / 昨天
# df['future_return'] = df['lastp1d'] / df['lastp2d'] - 1



initdx :0 b1>:5450 it:0.4 tdx_last_df:1.27 

In [5]:
df['signal1d'][-2:],df['EVAL_STATE'][-2:]

(code
 600331    5.0
 002132    5.0
 Name: signal1d, dtype: float64,
 code
 600331    1.0
 002132    9.0
 Name: EVAL_STATE, dtype: float64)

In [6]:
target_cols = ['trade_signal', 'EVAL_STATE']  # 如果有多个目标列
df_target = df[target_cols]

In [7]:
# -*- coding: utf-8 -*-
import pandas as pd
from sklearn.preprocessing import StandardScaler

# ============================================
# 1. 设定参数
# ============================================
lastdays = 9  # 提取最近 N 天特征

# ============================================
# 2. 获取最近交易日数据
# 假设 df 的 index 是 MultiIndex: (code, date)
# ============================================
# latest_date = df.index.get_level_values(1).max()
# print("最新交易日:", latest_date)

# # 只保留最新交易日及历史 lastdays-1 日
df_recent = df.groupby(level=0).apply(
    lambda x: x.sort_index(level=1).tail(lastdays)
).reset_index(level=0, drop=False)  # 保留 code

# ============================================
# 3. 生成矢量化特征
# ============================================

# df_feat = generate_df_vect_daily_features_safe(df_recent, lastdays=lastdays)

# # 过滤掉 upper <= 0 的行
# df_feat = df_feat.query('upper1d > 0')

# # ============================================
# # 4. 特征标准化
# # ============================================
# scaler = StandardScaler()

# # signal 和 eval 不做标准化
# exclude_cols = ['code'] + [f'{s}{d}d' for d in range(1,lastdays+1) for s in ['eval','signal']]
# feat_cols = [c for c in df_feat.columns if c not in exclude_cols]

# df_feat_scaled = df_feat.copy()
# df_feat_scaled[feat_cols] = scaler.fit_transform(df_feat[feat_cols])

# # ============================================
# # 5. 输出结果
# # ============================================
# print(df_feat_scaled.head())

# # df_feat_scaled 可以直接作为模型输入


# 假设 df_feat 是 generate_df_vect_daily_features 返回的 DataFrame
df_feat = pd.DataFrame(generate_df_vect_daily_features_safe(df, lastdays=cct.compute_lastdays))
df_feat = df_feat.query('upper1 > 0')

In [8]:


# 标准化特征列（不包括 code 和目标列）
feat_cols = [c for c in df_feat.columns if c not in ['code'] + target_cols]
df_feat_scaled = df_feat.copy()
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df_feat_scaled[feat_cols] = scaler.fit_transform(df_feat[feat_cols])

# 将目标列重新合并
# 重置索引，让 code 变成列
df_target_reset = df_target.reset_index()
df_feat_scaled_reset = df_feat_scaled.reset_index(drop=True)  # 原来的索引丢掉

# 将目标列合并
df_feat_scaled_reset[target_cols] = df_target_reset[target_cols]


In [9]:
target_cols,df_target,df_feat_scaled_reset

(['trade_signal', 'EVAL_STATE'],
         trade_signal  EVAL_STATE
 code                            
 920978           5.0         9.0
 920149           5.0         9.0
 920946           5.0         9.0
 920124           5.0         9.0
 920132           5.0         9.0
 ...              ...         ...
 603703           5.0         9.0
 601188           5.0         9.0
 002555           5.0         2.0
 600331           5.0         1.0
 002132           5.0         9.0
 
 [5449 rows x 2 columns],
         code   lasto1d   lasth1d   lastl1d   lastp1d  lastv1d    upper1  \
 0     000001 -0.340524 -0.345144 -0.336702 -0.341044      0.0 -0.340690   
 1     000002 -0.489463 -0.491194 -0.487658 -0.489948      0.0 -0.471301   
 2     000004 -0.354782 -0.358069 -0.354540 -0.355341      0.0 -0.328830   
 3     000006 -0.374523 -0.371855 -0.370818 -0.370737      0.0 -0.365101   
 4     000007 -0.340524 -0.342128 -0.342276 -0.344344      0.0 -0.325760   
 ...      ...       ...       ...       .

In [10]:
df_feat_scaled_reset[target_cols]

Unnamed: 0,trade_signal,EVAL_STATE
0,5.0,9.0
1,5.0,9.0
2,5.0,9.0
3,5.0,9.0
4,5.0,9.0
...,...,...
5444,5.0,9.0
5445,5.0,9.0
5446,5.0,2.0
5447,5.0,1.0


## 特征与标签准备（Supervised Learning）

In [11]:
# 假设 trade_signal 是目标
X = df_feat_scaled_reset.drop(columns=['code','trade_signal','EVAL_STATE'], errors='ignore')
y = df_feat_scaled_reset['trade_signal']  # 或者 'EVAL_STATE'

### 然后可以直接拆训练集/测试集：

In [12]:
from sklearn.model_selection import train_test_split

# 拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 查看训练集和测试集的大小
print("X_train:", X_train.shape)
print("X_test :", X_test.shape)
print("y_train:", y_train.shape)
print("y_test :", y_test.shape)

# 查看前几行数据
print(X_train.head())
print(y_train.head())


X_train: (4359, 65)
X_test : (1090, 65)
y_train: (4359,)
y_test : (1090,)
       lasto1d   lasth1d   lastl1d   lastp1d  lastv1d    upper1     ma51d  \
140  -0.418174 -0.413645 -0.414967 -0.417146      0.0 -0.396535 -0.412801   
3145 -0.494728 -0.495502 -0.493010 -0.494347      0.0 -0.485181 -0.490389   
4155 -0.438574 -0.442080 -0.440164 -0.442220      0.0 -0.426955 -0.437365   
4902  1.094907  1.300832  1.105521  1.252686      0.0  1.018003  1.112781   
2469  0.158718  0.145138  0.145600  0.137559      0.0  0.127695  0.143821   

        ma201d    ma601d    perc1d  ...  lastp5d  lastv5d  upper5  ma55d  \
140  -0.419975 -0.345103  0.125217  ...      0.0      0.0     0.0    0.0   
3145 -0.493453 -0.393978  0.982795  ...      0.0      0.0     0.0    0.0   
4155 -0.437074 -0.344081 -0.609850  ...      0.0      0.0     0.0    0.0   
4902  1.095441  1.098562  2.861299  ...      0.0      0.0     0.0    0.0   
2469  0.174173  0.246825 -0.446502  ...      0.0      0.0     0.0    0.0   

      

## 随机森林

In [13]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    n_estimators=200, 
    max_depth=8, 
    random_state=42
)
clf.fit(X_train, y_train)


RandomForestClassifier(max_depth=8, n_estimators=200, random_state=42)

In [14]:
y_pred = clf.predict(X_test)


In [15]:
y_proba = clf.predict_proba(X_test)[:, 1]  # 对于二分类，取类别1概率


### 模型评估

#### 分类

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


#### 回归

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 :", r2_score(y_test, y_pred))


#### 特征重要性分析

In [None]:
import pandas as pd
feat_importances = pd.Series(clf.feature_importances_, index=X_train.columns)
feat_importances.sort_values(ascending=False).head(5)


#### 模型保存

In [None]:
import joblib
# joblib.dump(clf, "rf_model.pkl")

In [None]:
# df.signal9d[-2:]

#### 后续操作


##### 调整模型超参数 (max_depth, n_estimators, learning_rate 等) 做 网格搜索或贝叶斯优化

结合股票特征做 回测策略

可以使用 predict_proba 输出信号强度，选择 top N 买入股票

### 随机森林 做示例可以替换成 XGBoost 或其他模型

In [None]:
# -------------------------------
# Step 0: 导入必要库
# -------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

# -------------------------------
# Step 1: 准备数据
# 假设你已有 X, y
# X: 特征表，y: 目标信号（9d）
# -------------------------------
# 拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------------
# Step 2: 训练模型
# -------------------------------
clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=6,
    min_samples_leaf=5,
    random_state=42,
    n_jobs=-1
)
clf.fit(X_train, y_train)

# -------------------------------
# Step 3: 模型评估
# -------------------------------
y_pred = clf.predict(X_test)
print("=== 分类报告 ===")
print(classification_report(y_test, y_pred))

print("=== 混淆矩阵 ===")
print(confusion_matrix(y_test, y_pred))

# -------------------------------
# Step 4: 特征重要性
# -------------------------------
feat_importances = pd.Series(clf.feature_importances_, index=X_train.columns)
feat_importances.sort_values(ascending=False).head(20)

# 可视化
plt.figure(figsize=(10,5))
feat_importances.sort_values(ascending=True).tail(20).plot(kind='barh')
plt.title("Top 20 Feature Importances")
plt.show()



In [None]:
full_data = df.copy()
full_data['EVAL_STATE']

In [None]:
import pandas as pd

# -------------------------------
# 1. 生成 full_X
# -------------------------------
# 假设 full_data 是最近全市场的原始特征 DataFrame
# 需要保证列名和训练集 X_train 一致
full_X = full_data[X_train.columns].copy()

# -------------------------------
# 2. 模型预测
# -------------------------------
signals = clf.predict(full_X)  # 分类信号
probabilities = clf.predict_proba(full_X)[:, 1]  # 预测概率，二分类时取第1列

# -------------------------------
# 3. 合并结果
# -------------------------------
signal_df = full_X.copy()
signal_df['signal'] = signals
signal_df['prob'] = probabilities

# 如果你有股票代码列，可以添加到 signal_df，例如：
# signal_df['code'] = full_data['code']
# signal_df['name'] = full_data['name']

# -------------------------------
# 4. 排序 TopN
# -------------------------------
topN = 50  # 取概率最高的 50 只股票
top_stocks = signal_df.sort_values('prob', ascending=False).head(topN)

# -------------------------------
# 5. 查看结果
# -------------------------------
print(top_stocks[['signal', 'prob']].head(10))


In [None]:
# Step 5: 全市场信号预测
signals = clf.predict(full_X)
probabilities = clf.predict_proba(full_X)[:, 1]

signal_df = full_X.copy()
signal_df['signal'] = signals
signal_df['prob'] = probabilities
signal_df['EVAL_STATE'] = full_data['EVAL_STATE']  # 假设全市场数据里已经有 EVAL_STATE 列

# Step 6: 策略回测
signal_df = calc_strategy_return(signal_df)

# 查看前几行
print(signal_df[['signal', 'EVAL_STATE', 'future_return', 'filtered_signal', 'strategy_return', 'cumulative_strategy_return']].head())

# 按概率选出潜力股
top_signal = signal_df.sort_values('prob', ascending=False)
print(top_signal[['signal', 'prob', 'EVAL_STATE']][-10:])


In [None]:
def run_full_signal_backtest(clf, full_X, top_n=50):
    """
    1. 使用模型预测信号
    2. 基于 EVAL_STATE 筛选有效建仓信号
    3. 计算未来 1 日收益
    4. 计算策略收益和累计收益
    5. 返回 top N 潜力股
    
    full_X: 最近全市场特征 DataFrame，必须包含 lastp1d/lastp2d, EVAL_STATE, signal列
    clf: 训练好的分类器
    """
    import pandas as pd

    df = full_X.copy()

    # 预测信号
    df['signal_model'] = clf.predict(df[full_X.columns.drop(['EVAL_STATE', 'signal1d','signal9d','eval9d'], errors='ignore')])
    if hasattr(clf, "predict_proba"):
        df['prob'] = clf.predict_proba(df[full_X.columns.drop(['EVAL_STATE', 'signal1d','signal9d','eval9d'], errors='ignore')])[:,1]
    else:
        df['prob'] = 0.5  # 没有概率输出时

    # 只在 EVAL_STATE 为启动/主升状态建仓
    df['filtered_signal'] = df['signal_model']
    df.loc[~df['EVAL_STATE'].isin([1,2]), 'filtered_signal'] = 0

    # 未来1日收益
    df['future_return'] = df['lastp1d'] / df['lastp2d'] - 1

    # 策略收益
    df['strategy_return'] = df['filtered_signal'] * df['future_return']
    df['cumulative_strategy_return'] = (1 + df['strategy_return']).cumprod()
    df['cumulative_market_return'] = (1 + df['future_return']).cumprod()

    # top N 潜力股
    top_signal = df.sort_values('prob', ascending=False).head(top_n)

    return df, top_signal

# # 使用示例
# signal_df, top_signal = run_full_signal_backtest(clf, full_X, top_n=50)
# print(top_signal[['filtered_signal','prob','EVAL_STATE']])


In [None]:
import numpy as np
import pandas as pd

def backtest_signals_clean(full_X, df_stock_data, clf, top_n=50, log_safe=True):
    """
    全市场信号预测 + 回测（安全版，filtered_signal 最大为1）
    """
    signal_df = full_X.copy()

    # 1. 模型预测
    signal_df['signal'] = clf.predict(full_X)
    signal_df['prob'] = clf.predict_proba(full_X)[:, 1]

    # 2. 加入 EVAL_STATE
    signal_df['EVAL_STATE'] = df_stock_data['EVAL_STATE']

    # 3. 计算1日收益
    signal_df['future_return'] = df_stock_data['lastp1d'] / df_stock_data['lastp2d'] - 1

    # 4. 策略信号过滤（只在启动/主升建仓，权重限制为1）
    signal_df['filtered_signal'] = signal_df.apply(
        lambda r: 1 if (r['signal'] > 0 and r['EVAL_STATE'] in [1, 2]) else 0,
        axis=1
    )

    # 5. 策略收益
    signal_df['strategy_return'] = signal_df['filtered_signal'] * signal_df['future_return']

    # 6. 安全累积收益
    safe_strategy_return = np.clip(signal_df['strategy_return'], -0.99, 0.99)
    if log_safe:
        signal_df['cumulative_strategy_return'] = np.expm1(np.log1p(safe_strategy_return).cumsum())
    else:
        signal_df['cumulative_strategy_return'] = (1 + safe_strategy_return).cumprod()

    # 市场累计收益
    safe_market_return = np.clip(signal_df['future_return'], -0.99, 0.99)
    signal_df['cumulative_market_return'] = (1 + safe_market_return).cumprod()

    # 7. 输出潜力股
    # top_signal = signal_df.sort_values('prob', ascending=False).head(top_n)
    top_signal = signal_df.sort_values('prob', ascending=False)
    return signal_df, top_signal


In [None]:
signal_df, top_signal = backtest_signals_clean(full_X, df, clf, top_n=50)

# 查看前50潜力股
print(top_signal[['signal','prob','EVAL_STATE','filtered_signal','future_return','strategy_return','cumulative_strategy_return']].head())

# 查看前几行回测结果
print(signal_df[['signal','future_return','strategy_return','cumulative_strategy_return']].head())


In [None]:
def show_tdx_data_all(df, codelist=None, market_value='3', col=None,limit=20,orderby='percent'):
    col_src = [
        "name",'percent', "couts", "date", "perc", "ra_diff","xratio", "idx", "hat", "close",
        "volsum5d", "hmax", "lmin", "perc1d", "ma20", "ma250", "nclose",
        "llow", 'vol1d', 'vol2d', 'vol3d', 'vol4d', 'vol5d', "perc%sd" %
        (market_value),
        "perc5d"]

    if isinstance(df, pd.Series):
        col_name = [co for co in col_src if co in df.index]
        # col_name.insert(1,'date')
    else:
        col_name = [co for co in col_src if co in df.columns]

    # print(col_name)
    # if isinstance(df, pd.DataFrame) and 'name' in df.columns:
    #     columns_ = col_name
    # else:
    #     if isinstance(df, pd.Series) and 'name' in df.index:
    #         columns_ = col_name.insert(1, "date")
    #     else:
    #         columns_ = col_name.remove("name")
    if orderby in df.columns:
        df=df.sort_values('percent', ascending=False)
    if isinstance(col,list):
        col_name.extend(col)
    elif col is not None:
        col_name.insert(4, col)

    if codelist is None:
        if isinstance(df, pd.Series):
            print("Count:1")
            temp = df.loc[col_name][:limit]
        else:
            print("Count:%s %s" % (df.shape))
            temp = df.loc[:, col_name][:limit]
    else:
        temp = df.loc[[x for x in codelist if x in df.index], col_name][:limit]
    return temp

In [None]:
def show_tdx_data(df, codelist=None, market_value='3', col=None):

    columns_ = [
        "name", "category", "close", "percent",'volra', 'topR', "lastp1d", "ma5dcum",
        "ma20d", "fib", "fibl", "ra", "ral", "df2", "per1d", "hmax", "lmin",
        "per%sd" % (market_value),
        "perc%sd" % (market_value)]
    if isinstance(df, pd.Series):
        columns_ = [co for co in columns_ if co in df.index]
    else:
        columns_ = [co for co in columns_ if co in df.columns]
    if col is not None:
        columns_.insert(4, col)

    if codelist is None:
        if isinstance(df, pd.Series):
            # print("Count:1")
            temp = df.loc[columns_]
        else:
            print("Count:%s %s" % (df.shape))
            temp = df.loc[:, columns_]
    else:
        temp = df.loc[[x for x in codelist if x in df.index], columns_]
    return temp


In [None]:
top_signal.shape

In [None]:
top50 =  top_signal.sort_values('prob', ascending=False).head(50)
show_tdx_data(df.loc[top50.index][-10:])


In [None]:
# plt.figure(figsize=(10,5))
# plt.plot(cumulative_return, label='策略累积收益')
# plt.plot(signal_df['return'].cumsum(), label='基准累积收益')
# plt.legend()
# plt.show()

# -------------------------------
# Step 7: 进一步优化
# - 调整特征窗口
# - 调整目标天数（5d, 7d, 9d）
# - 调参 n_estimators, max_depth, min_samples_leaf
# - 特征选择：剔除重要性为0的列
# - 回测策略：加资金管理、止损止盈
# -------------------------------


In [None]:
print(df.columns.tolist())

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def analyze_daily_signals(df, top_n=50, plot=False):
    """
    今天的策略信号分析
    
    参数:
    - df: 包含今天信号的数据，必须有 ['signal','prob','EVAL_STATE','filtered_signal','future_return','strategy_return']
    - top_n: 潜力股数量
    - plot: 是否绘制概率分布图
    
    返回:
    - top_signal: 按概率排序的前 top_n 潜力股
    - stats: 策略收益统计信息
    """
    df = df.copy()
    
    # 筛选今天的潜力股
    # top_signal = df[df['filtered_signal'] > 0].sort_values('prob', ascending=False).head(top_n)
    top_signal = df[df['filtered_signal'] > 0].sort_values('prob', ascending=False)
    # 策略收益统计
    stats = df['strategy_return'].describe()
    stats_dict = stats.to_dict()
    stats_dict['positive_ratio'] = (df['strategy_return'] > 0).mean()
    stats_dict['signal_coverage'] = (df['filtered_signal'] > 0).mean()
    
    if plot:
        plt.figure(figsize=(12,4))
        plt.scatter(df.index, df['prob'], c=df['filtered_signal'], cmap='coolwarm', s=50)
        plt.colorbar(label='filtered_signal')
        plt.xlabel('Stock Code')
        plt.ylabel('Signal Probability')
        plt.title('Today Signal Probabilities & Filtered Signals')
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()
    
    return top_signal, stats_dict


In [None]:
top_signal, stats = analyze_daily_signals(top_signal)
print(stats)
print(top_signal[['signal','prob','EVAL_STATE','future_return','strategy_return']])


In [None]:
top_signal.columns

In [None]:
# print(df.columns.tolist())

In [None]:
import numpy as np
import pandas as pd

def score_strong_pullback_ma5(df, min_score=60):
    """
    强势股 MA5 回调打分模型

    Parameters
    ----------
    df : DataFrame
        必须包含 ma5d, ma10d, ma20d, ma60d,
        lastp1d, lastp2d, lastp3d,
        lastv1d, lastv2d, close
    min_score : int
        最低保留分数，默认 60

    Returns
    -------
    DataFrame
        增加以下字段：
        trend_score, pullback_score, volume_score, strong_score
        并按 strong_score 倒序
    """

    _df = df.copy()

    # ================= Trend Score =================
    _df["trend_score"] = 0

    _df.loc[_df.ma5d > _df.ma10d, "trend_score"] += 10
    _df.loc[_df.ma10d > _df.ma20d, "trend_score"] += 10
    _df.loc[_df.ma20d > _df.ma60d, "trend_score"] += 10

    _df.loc[
        (_df.lastp1d > _df.lastp2d) & (_df.lastp2d > _df.lastp3d),
        "trend_score"
    ] += 10

    # ================= Pullback Score =================
    _df["pullback_score"] = 0
    pullback_ratio = (_df.close - _df.ma5d).abs() / _df.ma5d

    _df.loc[pullback_ratio <= 0.005, "pullback_score"] += 20
    _df.loc[(pullback_ratio > 0.005) & (pullback_ratio <= 0.01), "pullback_score"] += 15
    _df.loc[(pullback_ratio > 0.01) & (pullback_ratio <= 0.02), "pullback_score"] += 10

    _df.loc[_df.close > _df.ma5d, "pullback_score"] += 10
    _df.loc[_df.close > _df.ma10d, "pullback_score"] += 10

    # ================= Volume Score =================
    _df["volume_score"] = 0
    vol_ratio = _df.lastv1d / _df.lastv2d.replace(0, np.nan)

    _df.loc[vol_ratio >= 1.0, "volume_score"] += 20
    _df.loc[(vol_ratio >= 0.7) & (vol_ratio < 1.0), "volume_score"] += 15
    _df.loc[(vol_ratio >= 0.5) & (vol_ratio < 0.7), "volume_score"] += 10

    # ================= Strong Score =================
    _df["strong_score"] = (
        _df.trend_score +
        _df.pullback_score +
        _df.volume_score
    )

    # ================= Filter & Sort =================
    _df = _df[_df.strong_score >= min_score]
    _df = _df.sort_values("strong_score", ascending=False)

    return _df


In [None]:
df_strong = score_strong_pullback_ma5(df)
df_strong.shape
df_strong[
    ["name", "close", "ma5d", "ma10d",
     "trend_score", "pullback_score",
     "volume_score", "strong_score"]
].head(5)

In [None]:
strong_pullback_ma5 = df.query(
    "ma5d > ma10d > ma20d > ma60d "
    "and lastp1d > lastp2d > lastp3d "
    "and lasth1d > lasth2d > lasth3d "
    "and lastl1d > lastl2d > lastl3d "
    "and close <= ma5d * 1.01 "
    "and close >= ma5d * 0.97 "
    "and close > ma10d "
    "and lastv1d >= lastv2d * 0.7"
).copy()


In [None]:
def is_upper_attack(df):
    # mask = (
    # (df.close >= df.upper * 0.98) &
    # (df.bandwidth > df.bandwidth.rolling(10).mean()) &
    # (df.ma5d > df.ma10d) &
    # (df.ma10d > df.ma20d)
    #     )
    bw_ma10 = df.bandwidth.rolling(10).mean()

    mask = (
        (df.close >= df.upper * 0.98) &
        (df.bandwidth > bw_ma10) &
        (df.bandwidth.shift(1) <= bw_ma10.shift(1)) &  # ← 关键
        (df.ma5d > df.ma10d) &
        (df.ma10d > df.ma20d)
    )
    upper_attack = df[mask]

    return (upper_attack)


In [None]:
def is_upper_attack_first_touch(df):
    mask = (
        (df.close >= df.upper * 0.98) &
        (df.close.shift(1) < df.upper.shift(1) * 0.95) &  # 昨天没在上轨
        (df.close.shift(1) > df.ene) &                   # 从中轨上来
        (df.bandwidth > df.bandwidth.shift(1))
    )
    return df[mask]

In [None]:
def ma60_pullback_by_category_base(df, category_name="商业航天", max_pct_chg=8):
    """
    筛选指定板块的 MA60 突破回踩潜在动能股

    参数：
        df : pd.DataFrame
            股票数据，必须包含以下列：
            ['close', 'ma5d', 'ma60d', 'lastv1d', 'lastv2d', 'lastp1d', 'category']
        category_name : str
            板块名关键字，例如 "商业航天"
        max_pct_chg : float
            当日涨幅上限，避免追高割肉

    返回：
        pd.DataFrame : 可下单股票池
    """
    # 板块筛选
    mask_category = df['category'].str.contains(category_name, na=False)

    # 计算当日涨幅
    pct_chg = (df['close'] - df['lastp1d']) / df['lastp1d'] * 100

    # MA60 突破回踩可执行条件
    mask_ma60 = (
        (df.close > df.ma60d) &               # 已突破长期均线
        (df.lastp1d < df.ma60d) &            # 昨天未突破
        (df.close < df.ma5d * 1.01) &        # 回踩 MA5 附近
        (df.lastv1d / df.lastv2d > 0.5) &    # 放量确认
        (pct_chg < max_pct_chg)               # 今天涨幅小于上限
    )

    # 综合条件
    mask = mask_category & mask_ma60

    df_entry = df[mask].copy()

    # 排序（按最新成交量，可根据策略修改）
    df_entry = df_entry.sort_values('lastv1d', ascending=False)

    return df_entry


In [None]:
df['date'].iloc[-1]

In [None]:
import pandas as pd
import datetime

# 假设 df 有 date 列，按升序排列
df['date'] = pd.to_datetime(df['date'])

# 今天日期
today = datetime.datetime.now().date()

# 获取最近交易日（如果 today 不是交易日，则取最近一行日期）
if today in df['date'].dt.date.values:
    today_trading = today
else:
    today_trading = df['date'].iloc[-1].date()

# 回溯 n 天获取实际交易日
def get_trading_date(df, day_offset, today_trading=datetime.datetime.now().date()):
    """
    df: DataFrame，必须有 date 列
    day_offset: 回溯天数 (0=今天, 1=昨天, ...)
    today_trading: 当前交易日日期
    """
    trading_days = pd.Series(df['date'].dt.date.unique()).sort_values()
    today_idx = trading_days[trading_days == today_trading].index[0]
    target_idx = today_idx - day_offset
    if target_idx < 0:
        return None
    return trading_days.iloc[target_idx]

# # 示例：d=5
# d = 3
# trading_date_d5 = get_trading_date(df, d)
# print(f'today: {today} d: {d}, 实际交易日: {trading_date_d5}')

# # 判断是否工作日
# is_workday = trading_date_d5.weekday() < 5
# print(f'是否工作日: {is_workday}')

# # 输出当天股票列表
# df_day5 = df[df['day']==d]
# print(f'd:{d}, df_day:{df_day5.index.tolist()}')


In [None]:
df.date

In [None]:
def strong_momentum_continuous(df, max_days=9):
    """
    筛选从最近 2 天到 max_days 天连续高点、收盘、低点升高的股票

    参数：
        df : pd.DataFrame
            必须包含列：
            ['lastp1d'..'lastp9d', 'lasth1d'..'lasth9d', 'lastl1d'..'lastl9d']
        max_days : int
            最大连续天数

    返回：
        dict : {连续天数: DataFrame}，每个天数对应符合条件的股票
    """
    result_dict = {}

    for window in range(2, max_days+1):
        strong_stocks = []

        for idx, row in df.iterrows():
            high_inc = True
            close_new_high = True
            low_inc = True

            for d in range(1, window):
                # 检查最近 window 天是否连续升高
                if row[f'lasth{d+1}d'] >= row[f'lasth{d}d']:
                    high_inc = False
                if row[f'lastp{d+1}d'] >= row[f'lastp{d}d']:
                    close_new_high = False
                if row[f'lastl{d+1}d'] >= row[f'lastl{d}d']:
                    low_inc = False

            if high_inc and close_new_high and low_inc:
                strong_stocks.append(idx)

        df_window = df.loc[strong_stocks].copy()
        # 排序可按最新收盘价
        df_window = df_window.sort_values('lastp1d', ascending=False)
        result_dict[window] = df_window

    return result_dict


In [None]:
import pandas as pd

def strong_momentum_today_plus_history_sum_opt(df, max_days=cct.compute_lastdays, winlimit=winlimit):
    """
    完全向量化版本，用 NumPy 计算严格连续上涨和 sum_percent 25ms
    """
    result_dict = {}

    # ===== 0️⃣ 判断今天状态，只做一次 =====
    is_trade_day = cct.get_trade_date_status()
    in_market_hours = 915 < cct.get_now_time_int() < 1500
    real_time_mode = is_trade_day and in_market_hours

    ohlc_same_as_last1d = (
        (df['open'] == df.get('lasto1d', df['open'])) &
        (df['low'] == df.get('lastl1d', df['low'])) &
        (df['high'] == df.get('lasth1d', df['high'])) &
        (df['close'] == df.get('lastp1d', df['close']))
    )
    use_real_ohlc = real_time_mode & (~ohlc_same_as_last1d)

    # ===== 1️⃣ 今天数据列 =====
    today_open  = df['open'].where(use_real_ohlc, df['lasto1d']).to_numpy()
    today_high  = df['high'].where(use_real_ohlc, df['lasth1d']).to_numpy()
    today_low   = df['low'].where(use_real_ohlc, df['lastl1d']).to_numpy()
    today_close = df['close'].where(use_real_ohlc, df['lastp1d']).to_numpy()

    codes = df.index.to_numpy()

    # ===== 2️⃣ 历史收盘/高/低 =====
    # 构建 N x max_days 的 NumPy array
    lastp = np.zeros((len(df), max_days))
    lasth = np.zeros((len(df), max_days))
    lastl = np.zeros((len(df), max_days))

    for i in range(1, max_days+1):
        lastp[:, i-1] = df.get(f'lastp{i}d', 0).to_numpy()
        lasth[:, i-1] = df.get(f'lasth{i}d', 0).to_numpy()
        lastl[:, i-1] = df.get(f'lastl{i}d', 0).to_numpy()

    # ===== 3️⃣ 遍历窗口 =====
    for window in range(winlimit, max_days+1):
        if window == 1:
            # window=1 特殊处理
            # mask = (today_high > lastp[:, 0]) & (today_close > lastp[:, 0])
            # window=1 特殊处理：实时 vs 收盘后
            mask = np.where(
                use_real_ohlc.to_numpy(),
                (today_high > lastp[:, 0]) & (today_close > lastp[:, 0]),  # 实时 vs 昨天
                (lastp[:, 0] > df.get('lastp2d', lastp[:, 0]).to_numpy()) &
                (lasth[:, 0] > df.get('lasth2d', lasth[:, 0]).to_numpy())  # 收盘后 vs 前天
            )
        else:
            # 严格连续上涨
            # lastp[:, 0:window-1] > lastp[:, 1:window] for close
            mask_close = np.all(lastp[:, :window-1] > lastp[:, 1:window], axis=1)
            mask_high  = np.all(lasth[:, :window-1] > lasth[:, 1:window], axis=1)
            mask_low   = np.all(lastl[:, :window-1] > lastl[:, 1:window], axis=1)
            mask = mask_close & mask_high & mask_low

        if not mask.any():
            continue

        # ===== 4️⃣ sum_percent =====
        compare_low = lastl[:, window-1].copy()
        compare_low[compare_low==0] = today_low[compare_low==0]  # 避免0
        sum_percent = ((today_high - compare_low) / compare_low * 100).round(2)
        sum_percent = sum_percent[mask]

        # ===== 5️⃣ 构建 df 矩阵 =====
        df_window = df.iloc[mask].copy()
        df_window['sum_perc'] = sum_percent
        df_window = df_window.sort_values('sum_perc', ascending=False)
        result_dict[window] = df_window

    return result_dict

In [None]:
def strong_momentum_strict_single_percent(df, max_days=9):
    """
    严格连续 2~max_days 天高低收盘升高的股票
    sum_percent 用今天 high - lastl{window-1}d 一次性计算
    遇到 low=0 时打印 debug
    """
    result_dict = {}

    for window in range(2, max_days+1):
        strong_stocks = []
        sum_percent_dict = {}

        for idx, row in df.iterrows():
            high_inc = True
            close_inc = True
            low_inc = True

            # 连续升高判断：今天 -> window-1 天前
            for d in range(1, window):
                today_high = row['high'] if d == 1 else row[f'lasth{d-1}d']
                today_close = row['close'] if d == 1 else row[f'lastp{d-1}d']
                today_low = row['low'] if d == 1 else row[f'lastl{d-1}d']

                prev_high = row.get(f'lasth{d}d', None)
                prev_close = row.get(f'lastp{d}d', None)
                prev_low = row.get(f'lastl{d}d', None)

                if prev_high is None or prev_close is None or prev_low is None or prev_low <= 0:
                    print(f"[DEBUG] 股票 {row.get('name', idx)} idx={idx}, window={window}, d={d}, prev_low=0或缺失，断开连续")
                    high_inc = close_inc = low_inc = False
                    break

                if today_high <= prev_high:
                    high_inc = False
                if today_close <= prev_close:
                    close_inc = False
                if today_low <= prev_low:
                    low_inc = False

            if high_inc and close_inc and low_inc:
                # sum_percent 计算
                compare_low = row.get(f'lastl{window-1}d', 0)
                if compare_low <= 0:
                    compare_low = row['low'] if row['low'] > 0 else row['high']
                    print(f"[DEBUG] 股票 {row.get('name', idx)} idx={idx}, window={window}, compare_low=0，用替代值={compare_low}")
                sum_percent = (row['high'] - compare_low) / compare_low

                strong_stocks.append(idx)
                sum_percent_dict[idx] = sum_percent

        df_window = df.loc[strong_stocks].copy()
        df_window['sum_perc'] = df_window.index.map(sum_percent_dict)
        df_window = df_window.sort_values('sum_perc', ascending=False)
        result_dict[window] = df_window

    return result_dict


In [None]:
def print_filtered_results(results, seen_set=set()):
    """
    封装后的显示函数：
    1. 按天数从大到小排列
    2. 自动过滤已出现的股票
    3. 2-3天窗口仅显示涨幅前10名及总数
    """
    for window in sorted(results.keys(), reverse=True):
        # 复制数据以防修改原始 dict
        df_window = results[window].copy()
        
        # 1. 过滤已经出现过的股票
        df_window = df_window[~df_window['name'].isin(seen_set)]
        
        if df_window.empty:
            continue
            
        total_count = len(df_window)
        
        # 2. 排序逻辑（基于你现有的 future_return 字段）
        df_sorted = df_window.sort_values(by='per1d', ascending=False)
        
        # 3. 分支判断：2, 3 天只取前 10
        if window in [2, 3]:
            df_display = df_sorted.head(10)
            print(f"\n连续 {window} 天升高 (总数: {total_count} 家，仅显示涨幅前10):")
        else:
            df_display = df_sorted
            print(f"\n连续 {window} 天升高 (总数: {total_count} 家):")
            
        # 4. 打印输出
        if not df_display.empty:
            print(df_display[['name', 'lastp1d', 'lasth1d', 'lastl1d']])
            
        # 5. 更新已见集合（使用 df_window 的全量名单，确保不重复）
        seen_set.update(df_window['name'].tolist())

# 调用示例：
# seen = set()
# print_filtered_results(results, seen)

In [None]:
def compare_results_dict(results1, results2, tol=1e-6,diffsum=False):
    """
    比较两个 strong_momentum 结果字典是否一致
    - results1, results2: dict{window: DataFrame}
    - tol: 数值容差，用于 sum_percent 比较
    返回 True/False，并打印差异
    """
    all_windows = set(results1.keys()).union(results2.keys())
    consistent = True

    for window in sorted(all_windows):
        df1 = results1.get(window)
        df2 = results2.get(window)

        if df1 is None and df2 is None:
            continue
        elif df1 is None or df2 is None:
            print(f"[DIFF] window={window}: one is None, the other exists")
            consistent = False
            continue

        # 对比索引（股票 name）
        names1 = set(df1['name'])
        names2 = set(df2['name'])
        if names1 != names2:
            diff1 = names1 - names2
            diff2 = names2 - names1
            print(f"[DIFF] window={window} - 股票不一致")
            if diff1:
                print(f"  only in results1: {diff1}")
            if diff2:
                print(f"  only in results2: {diff2}")
            consistent = False
        
        if diffsum:
            # 对比 sum_percent
            common_names = names1 & names2
            for name in common_names:
                val1 = df1.loc[df1['name']==name, 'sum_perc'].values[0]
                val2 = df2.loc[df2['name']==name, 'sum_perc'].values[0]
                if abs(val1 - val2) > tol:
                    print(f"[DIFF] window={window}, 股票={name}, sum_percent 不一致: {val1} vs {val2}")
                    consistent = False

    if consistent:
        print("✅ 两个 results 字典一致")
    else:
        print("❌ 两个 results 字典存在差异")

    return consistent


In [None]:
def print_strong_stocks_by_window(results, columns=['name','lastp1d','lasth1d','lastl1d','sum_perc'], top_n=None):
    """
    按连续天数从大到小去重显示股票，避免重复显示
    
    参数：
    - results: dict, key=连续天数, value=对应DataFrame
    - columns: list, 要显示的列
    - top_n: int 或 None, 每个窗口显示前 N 条股票，None 显示全部
    """
    seen = set()  # 已加入的股票

    for window in sorted(results.keys(), reverse=True):  # 从大到小
        df_window = results[window].copy()
        # 过滤已经出现过的股票
        df_window = df_window[~df_window['name'].isin(seen)]
        if df_window.empty:
            continue
        total_count = len(df_window)
        print(f"\n连续 {window} 天高低收盘升高的股票，总数：{total_count}")
        if top_n is not None:
            print(df_window[columns].head(top_n))
        else:
            print(df_window[columns])

        # 添加到已见集合，避免重复
        seen.update(df_window['name'].tolist())
    return seen

In [None]:
# def merge_strong_momentum_results(results, min_days=3, columns=['name','lastp1d','lasth1d','lastl1d','sum_percent']):
def merge_strong_momentum_results(results, min_days=2, columns=['sum_perc']):
    """
    将 strong_momentum_strict_single_percent 的结果合并为一个 DataFrame
    - 只保留连续天数 >= min_days
    - 添加一列 'window' 表示连续天数
    - 按 window 从大到小去重，避免重复显示
    """
    merged_list = []
    seen = set()  # 已加入的股票 name

    for window in sorted(results.keys(), reverse=True):  # 大到小
        if window < min_days:
            continue
        df_window = results[window].copy()
        # 过滤已经出现过的股票
        df_window = df_window[~df_window['name'].isin(seen)]
        if df_window.empty:
            continue
        df_window['win'] = window
        merged_list.append(df_window)
        seen.update(df_window['name'].tolist())

    if merged_list:
        merged_df = pd.concat(merged_list, ignore_index=False)
        merged_df = merged_df.sort_values(['win','sum_perc'], ascending=[False, False])
        return merged_df[columns + ['win']]
    else:
        return pd.DataFrame(columns=columns + ['win'])


In [None]:
# results = strong_momentum_today_plus_history_percent_fast(df, max_days=9)


In [None]:
print_strong_stocks_by_window(results, top_n=10)

In [None]:
def align_sum_percent(df, merged_df):
    """
    将 merged_df 的 sum_percent 和 window 对齐到原始 df
    - df: 原始 DataFrame，index 为 code 或包含 'code' 列
    - merged_df: merged strong momentum DataFrame，index 为 code
    - 对缺失的 sum_percent 填 0，window 填 NaN
    """
    df_copy = df.copy()
    
    # 如果 df 没有 code 作为索引，则设为索引
    if 'code' in df_copy.columns and df_copy.index.name != 'code':
        df_copy = df_copy.set_index('code')
    
    # 对齐 sum_percent 和 window
    df_copy['sum_perc'] = merged_df['sum_perc'].reindex(df_copy.index).fillna(0)
    df_copy['win'] = merged_df['win'].reindex(df_copy.index).replace([np.inf, -np.inf], 0).fillna(0).astype(int)
    
    return df_copy


# strong_momentum_today_plus_history_sum_opt

In [None]:
results_opt = strong_momentum_today_plus_history_sum_opt(df, max_days=cct.compute_lastdays)


In [None]:
data_t= df.loc[['688239','601360']]
results_t = strong_momentum_today_plus_history_sum_opt(data_t, max_days=cct.compute_lastdays)
print_strong_stocks_by_window(results_t, top_n=10)

In [None]:
print_strong_stocks_by_window(results_opt, top_n=10)

In [None]:
# 假设 df['code'] 原本不是索引
code = '601360'
# df_feat = pd.DataFrame(generate_df_vect_daily_features_safe(df, lastdays=cct.compute_lastdays))
# # df_feat.loc[code]
# df_feat[-1:]
# df_indexed = df_feat.set_index('code', drop=False)  # drop=False 保留原 code 列
results_opt
# 查找 code = '601360'
row = df_indexed.loc['601360']  # 直接 O(1) 查找
row

In [None]:
print_strong_stocks_by_window(results_opt, top_n=10)

In [None]:
compare_results_dict(results,results_opt)

In [None]:
clean_sum = merge_strong_momentum_results(results_opt,min_days=2)

In [None]:
clean_sum

In [None]:
align_sum_percent(df,clean_sum).win

In [None]:
results_debug = strong_momentum_strict_single_percent(df, max_days=9)

In [None]:
# print_filtered_results(results)

In [None]:
def ma60_pullback_backtest(df, category_name="商业航天", max_pct_chg=8, days=3):
    """
    回溯式筛选 MA60 突破回踩潜在动能股，多日状态迭代

    参数：
        df : pd.DataFrame
            股票数据，必须包含列：
            ['close', 'ma5d', 'ma10d', 'ma20d', 'ma60d',
             'ma51d','ma52d','ma201d','ma202d', 
             'lastp1d','lastp2d','lastp3d','lastv1d','lastv2d','lastv3d','category']
        category_name : str
            板块名关键字
        max_pct_chg : float
            多日涨幅上限
        days : int
            回溯天数，例如 3

    返回：
        pd.DataFrame : 每天状态满足的股票列表，包含 day 列标记
    """
    results = []

    # 板块筛选
    mask_category = df['category'].str.contains(category_name, na=False)

    for d in range(days):
        # 动态列名
        last_close = 'close' if d==0 else f'lastp{d}d'
        last_prev = f'lastp{d+1}d'
        ma5_col = f'ma5d' if d==0 else f'ma5{d+1}d'
        ma10_col = f'ma10d' if d==0 else f'ma10{d+1}d'
        ma60_col = f'ma60d' if d==0 else f'ma60{d+1}d'
        lastv_col = f'lastv{d+1}d'
        lastv_prev_col = f'lastv{d+2}d'

        # 当日涨幅
        pct_chg = (df[last_close] - df[last_prev]) / df[last_prev] * 100

        # MA60 回踩突破条件
        mask_state = (
            (df[last_close] > df[ma60_col]) &
            (df[last_prev] < df[ma60_col]) &
            (df[last_close] < df[ma5_col]*1.01) &
            (df[lastv_col] / df[lastv_prev_col] > 0.5) &
            (pct_chg < max_pct_chg)
        )

        # 板块 + 状态
        mask = mask_category & mask_state

        df_day = df[mask].copy()
        df_day['day'] = d  # 标记回溯天
        print(f'd: {d} df_day:{df_day.index.tolist()}')
        
        results.append(df_day)

    # 合并所有天的数据
    df_result = pd.concat(results, ignore_index=False)
    df_result = df_result.sort_values(['day','lastv1d'], ascending=[True, False])

    return df_result


In [None]:
# upper_df = is_upper_attack_first_touch(df)
upper_df = ma60_pullback_backtest(df,days=8)
upper_df.shape
# upper_df[["name", "close", "ma5d", "ma10d"]].head(20)
show_tdx_data_all(upper_df)

In [None]:
top50 = df.query("ma5d > ma10d > ma60d and lastp1d > lastp2d > lastp3d and lasth1d > lasth2d > lasth3d and lastl1d > lastl2d > lastl3d").sort_values('per1d', ascending=False)
show_tdx_data(df.loc[top50.index])

# 全局图

In [None]:
import matplotlib.pyplot as plt

def plot_signal_scatter(df):
    """
    绘制策略信号散点图
    df: 包含至少 ['prob', 'future_return', 'EVAL_STATE'] 列
    """
    plt.figure(figsize=(10,6))
    
    # 正收益点
    pos = df[df['future_return'] > 0]
    plt.scatter(pos['prob'], pos['future_return'], color='green', label='Positive Return', alpha=0.7)
    
    # 负收益点
    neg = df[df['future_return'] <= 0]
    plt.scatter(neg['prob'], neg['future_return'], color='red', label='Negative Return', alpha=0.7)
    
    plt.axhline(0, color='black', linestyle='--', linewidth=0.8)
    plt.xlabel('Predicted Probability')
    plt.ylabel('Future Return')
    plt.title('Strategy Signal Scatter Plot')
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
plot_signal_scatter(top_signal)  # df_today 是你刚整理好的策略信号 DataFrame
