In [1]:
# Step 1: 导入库
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


In [2]:
# Step 1.1: 定义 RSI 函数
def compute_rsi(series, period=14):
    delta = series.diff()
    gain = delta.clip(lower=0).rolling(window=period).mean()
    loss = -delta.clip(upper=0).rolling(window=period).mean()
    rs = gain / (loss + 1e-10)  # 防止除以0
    rsi = 100 - (100 / (1 + rs))
    return rsi


In [3]:
# Step 2: 读取数据
df = pd.read_csv('/content/Nasdaq100Data.csv', parse_dates=['Date'])

# Step 2.1: 转换数字格式
for col in ['Price', 'Open', 'High', 'Low']:
    df[col] = df[col].astype(str).str.replace(',', '').astype(float)

# Step 2.2: 添加技术指标
df['MA10'] = df['Price'].rolling(10).mean()
df['RSI'] = compute_rsi(df['Price'])
# === 添加增强特征 ===
df['Return5'] = df['Price'].pct_change(5)
df['Volatility5'] = df['Price'].rolling(5).std()
df['Bias_MA10'] = (df['Price'] - df['MA10']) / df['MA10']

# 最后更新你的特征列
feature_cols = ['Price', 'MA10', 'RSI', 'Return5', 'Volatility5', 'Bias_MA10']


# Step 2.3: 删除NaN
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)


In [4]:
# 确保按时间升序排列并重置索引
df = df.sort_values('Date').reset_index(drop=True)

# 标签构造：严格比较今天与未来5天每一天的价格
labels = []
for i in range(len(df) - 5):
    current = df.loc[i, 'Price']
    future = df.loc[i+1:i+5, 'Price'].values

    if np.all(current < future):
        labels.append(1)  # 当前比未来都低 → 最低点（买入）
    elif np.all(current > future):
        labels.append(2)  # 当前比未来都高 → 最高点（卖出）
    else:
        labels.append(0)  # 中间区域，不确定

# 尾部补0（未来不足5天无法判断）
labels += [0] * (len(df) - len(labels))

df['label'] = labels


In [5]:
# 统计标签分布
unique, counts = np.unique(df['label'], return_counts=True)
label_count = dict(zip(unique, counts))

# 显示结果
print("\n=== 标签分布统计 ===")
print(f"不确定（label=0）：{label_count.get(0, 0)} 天")
print(f"最低点（label=1，高点信号）：{label_count.get(1, 0)} 天")
print(f"最高点（label=2，低点信号）：{label_count.get(2, 0)} 天")



=== 标签分布统计 ===
不确定（label=0）：1175 天
最低点（label=1，高点信号）：699 天
最高点（label=2，低点信号）：381 天


In [6]:
print(df.loc[df['label'] == 1].tail(10))


           Date     Price      Open      High       Low     Vol. Change %  \
2191 2025-01-27  21127.28  21000.17  21292.54  20974.90  631.40M   -2.97%   
2196 2025-02-03  21297.58  21084.64  21406.58  21004.35  387.17M   -0.84%   
2200 2025-02-07  21491.31  21776.28  21869.32  21465.46  323.70M   -1.30%   
2202 2025-02-11  21693.52  21629.11  21776.25  21625.51  283.61M   -0.29%   
2203 2025-02-12  21719.26  21475.41  21745.66  21454.19  308.36M    0.12%   
2223 2025-03-13  19225.48  19534.37  19558.56  19152.57  398.60M   -1.89%   
2226 2025-03-18  19483.36  19657.10  19676.05  19397.07  333.56M   -1.66%   
2228 2025-03-20  19677.61  19558.28  19888.85  19549.31  371.49M   -0.30%   
2241 2025-04-08  17090.40  18034.46  18207.01  16850.18  660.31M   -1.95%   
2249 2025-04-21  17808.30  18023.01  18043.08  17592.92  347.85M   -2.46%   

           MA10        RSI   Return5  Volatility5  Bias_MA10  label  
2191  21477.598  27.595220 -0.007996   155.108243  -0.016311      1  
2196  21710.

In [7]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# 替换 inf 为 NaN（这一步必须放前面）
df[feature_cols] = df[feature_cols].replace([np.inf, -np.inf], np.nan)

# 记录被删除的行
na_mask = df[feature_cols].isna()
rows_with_na = df[na_mask.any(axis=1)].copy()

print(f"\n🧹 删除含无效特征的行数：{len(rows_with_na)} 行")

# 输出被删除行的关键信息（日期、价格、缺失项）
for idx, row in rows_with_na.iterrows():
    na_cols = na_mask.loc[idx]
    missing_features = [col for col, is_na in na_cols.items() if is_na]
    date = row['Date']
    price = row['Price']
    print(f"📉 日期: {date.date()} | 价格: {price:.2f} | 缺失特征: {', '.join(missing_features)}")

# 执行删除并重置索引
df = df.dropna(subset=feature_cols).reset_index(drop=True)

# 标准化特征
scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])



🧹 删除含无效特征的行数：0 行


In [8]:
X, y, indices = [], [], []

for i in range(30, len(df)):
    seq = df.loc[i-30:i-1, feature_cols].values
    label = df.loc[i, 'label']

    if not np.isfinite(seq).all():
        continue
    if seq.shape != (30, len(feature_cols)):
        continue

    X.append(seq)
    y.append(label)
    indices.append(i)  # 记录当前样本标签所在的日期索引，用于预测展示

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int32)


In [9]:
print(f"\n✅ 最终训练样本数：{X.shape[0]}, 输入维度：{X.shape[1:]}")

# 标签统计
unique, counts = np.unique(y, return_counts=True)
print("\n📊 标签分布：")
for u, c in zip(unique, counts):
    name = {0: "观望", 1: "买入", 2: "卖出"}.get(u, "未知")
    print(f"Label {u}（{name}）→ {c} 条")




✅ 最终训练样本数：2225, 输入维度：(30, 6)

📊 标签分布：
Label 0（观望）→ 1158 条
Label 1（买入）→ 694 条
Label 2（卖出）→ 373 条


In [10]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# === 设置你要用的特征列（这些列必须已存在） ===
feature_cols = ['Price', 'MA10', 'RSI', 'Return5', 'Volatility5', 'Bias_MA10']

# === 标准化特征列 ===
scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])

# === 构造样本序列 X 和标签 y ===
X, y, indices = [], [], []

for i in range(30, len(df)):
    seq = df.loc[i-30:i-1, feature_cols].values
    label = df.loc[i, 'label']

    if not np.isfinite(seq).all():
        continue
    if seq.shape != (30, len(feature_cols)):
        continue

    X.append(seq)
    y.append(label)
    indices.append(i)  # 记录真实标签所在位置（第i天）

# 转换为 NumPy 数组
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.int32)

# === 输出维度和标签分布 ===
print(f"\n✅ 最终训练样本数：{X.shape[0]}, 每个样本维度：{X.shape[1:]}")

unique, counts = np.unique(y, return_counts=True)
print("\n📊 标签分布：")
for u, c in zip(unique, counts):
    name = {0: "观望", 1: "买入", 2: "卖出"}.get(u, "未知")
    print(f"Label {u}（{name}）→ {c} 条")



✅ 最终训练样本数：2225, 每个样本维度：(30, 6)

📊 标签分布：
Label 0（观望）→ 1158 条
Label 1（买入）→ 694 条
Label 2（卖出）→ 373 条


In [11]:
print("\n📌 最后 10 条训练样本（含目标日的日期、价格与标签）：")
for i in range(-10, 0):
    row_idx = indices[i]  # 第 i 个样本的标签在原始 df 中的行号
    row = df.loc[row_idx]
    label_text = {0: "观望", 1: "买入", 2: "卖出"}.get(row['label'], "未知")
    print(f"{len(indices)+i+1:02d} | 日期: {row['Date'].date()} | 价格: {row['Price']:.2f} | 标签: {row['label']}（{label_text}）")



📌 最后 10 条训练样本（含目标日的日期、价格与标签）：
2216 | 日期: 2025-04-14 | 价格: 0.81 | 标签: 0（观望）
2217 | 日期: 2025-04-15 | 价格: 0.81 | 标签: 2（卖出）
2218 | 日期: 2025-04-16 | 价格: 0.78 | 标签: 0（观望）
2219 | 日期: 2025-04-17 | 价格: 0.78 | 标签: 0（观望）
2220 | 日期: 2025-04-21 | 价格: 0.76 | 标签: 1（买入）
2221 | 日期: 2025-04-22 | 价格: 0.78 | 标签: 0（观望）
2222 | 日期: 2025-04-23 | 价格: 0.81 | 标签: 0（观望）
2223 | 日期: 2025-04-24 | 价格: 0.84 | 标签: 0（观望）
2224 | 日期: 2025-04-25 | 价格: 0.85 | 标签: 0（观望）
2225 | 日期: 2025-04-28 | 价格: 0.85 | 标签: 0（观望）


In [12]:
from tensorflow.keras import layers, models, regularizers

def build_cnn_model_3class(input_shape):
    input_layer = layers.Input(shape=input_shape)

    x = layers.Conv1D(64, 3, padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(0.001))(input_layer)
    x = layers.BatchNormalization()(x)
    x = layers.Conv1D(64, 3, padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.GlobalMaxPooling1D()(x)

    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.5)(x)

    # 三分类输出：0 = 观望, 1 = 买入, 2 = 卖出
    output = layers.Dense(3, activation='softmax')(x)

    model = models.Model(inputs=input_layer, outputs=output)
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [13]:
from tensorflow.keras import layers, models, regularizers

def build_stronger_cnn_model(input_shape):
    input_layer = layers.Input(shape=input_shape)

    # --- 多尺度卷积 ---
    conv3 = layers.Conv1D(64, 3, padding='same', activation='relu',
                          kernel_regularizer=regularizers.l2(0.001))(input_layer)
    conv5 = layers.Conv1D(64, 5, padding='same', activation='relu',
                          kernel_regularizer=regularizers.l2(0.001))(input_layer)
    conv7 = layers.Conv1D(64, 7, padding='same', activation='relu',
                          kernel_regularizer=regularizers.l2(0.001))(input_layer)

    x = layers.Concatenate()([conv3, conv5, conv7])
    x = layers.BatchNormalization()(x)

    # --- Global pooling ---
    x = layers.GlobalMaxPooling1D()(x)

    # --- 全连接 + Dropout ---
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)

    # --- 三分类输出 ---
    output = layers.Dense(3, activation='softmax')(x)

    model = models.Model(inputs=input_layer, outputs=output)
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [14]:
# === 🔥 强化版 CNN 模型（不含 Attention）===

from tensorflow.keras import layers, models, regularizers, optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.losses import SparseCategoricalCrossentropy


def build_stronger_cnn_model2(input_shape):
    input_layer = layers.Input(shape=input_shape)

    # --- 多尺度卷积 ---
    conv3 = layers.Conv1D(64, 3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.001))(input_layer)
    conv5 = layers.Conv1D(64, 5, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.001))(input_layer)
    conv7 = layers.Conv1D(64, 7, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.001))(input_layer)

    x = layers.Concatenate()([conv3, conv5, conv7])
    x = layers.BatchNormalization()(x)

    # --- 卷积残差块（增强非线性 + 局部感知） ---
    res = layers.Conv1D(128, 3, padding='same', activation='relu')(x)
    res = layers.BatchNormalization()(res)
    res = layers.Conv1D(192, 3, padding='same', activation='relu')(res)
    x = layers.Add()([x, res])
    x = layers.LayerNormalization()(x)

    # --- 池化 + 全连接 ---
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)

    # --- 输出层（三分类）---
    output = layers.Dense(3, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=output)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4),
        loss=SparseCategoricalCrossentropy(),
        metrics=['accuracy']
    )
    return model


In [15]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val, indices_train, indices_val = train_test_split(
    X, y, indices, test_size=0.2, random_state=42, stratify=y
)


In [16]:
# model = build_cnn_model_3class(input_shape=X.shape[1:])
# model = build_stronger_cnn_model(input_shape=X.shape[1:])
model = build_stronger_cnn_model2(input_shape=X.shape[1:])
model.summary()


In [17]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stop = EarlyStopping(
    monitor='val_loss', patience=6, restore_best_weights=True, verbose=1
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3, verbose=1
)


In [18]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=80,
    batch_size=32,
    callbacks=[early_stop, lr_scheduler],
    verbose=1
)


Epoch 1/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 134ms/step - accuracy: 0.3645 - loss: 1.7599 - val_accuracy: 0.5371 - val_loss: 1.0561 - learning_rate: 1.0000e-04
Epoch 2/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4574 - loss: 1.1947 - val_accuracy: 0.4899 - val_loss: 1.0684 - learning_rate: 1.0000e-04
Epoch 3/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4813 - loss: 1.0911 - val_accuracy: 0.5146 - val_loss: 1.0563 - learning_rate: 1.0000e-04
Epoch 4/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4682 - loss: 1.0583 - val_accuracy: 0.5551 - val_loss: 1.0215 - learning_rate: 1.0000e-04
Epoch 5/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5433 - loss: 0.9892 - val_accuracy: 0.5865 - val_loss: 0.9880 - learning_rate: 1.0000e-04
Epoch 6/80
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [27]:
import numpy as np
from sklearn.metrics import classification_report

# === 推理预测 ===
pred_probs = model.predict(X_val)
pred_labels = np.argmax(pred_probs, axis=1)

# === 标签映射（可自定义） ===
label_names = {0: "观望", 1: "买入", 2: "卖出"}

# === 打印前 N 条预测结果（含时间与价格） ===
print("\n📊 测试集预测结果（前20条）：")
for i in range(min(20, len(X_val))):
    idx = indices_val[i]  # 找回对应的 df 行
    date = df.loc[idx, 'Date']
    price = df.loc[idx, 'Price']
    actual = y_val[i]
    pred = pred_labels[i]
    prob = pred_probs[i]

    print(f"{i+1:02d} | 日期: {date.date()} | 价格: {price:.2f} | "
          f"预测: {label_names[pred]} ({prob[pred]:.2f}) | 实际: {label_names[actual]}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step

📊 测试集预测结果（前20条）：
01 | 日期: 2023-09-07 | 价格: 0.62 | 预测: 观望 (0.51) | 实际: 观望
02 | 日期: 2017-03-10 | 价格: 0.07 | 预测: 观望 (0.72) | 实际: 观望
03 | 日期: 2019-10-07 | 价格: 0.20 | 预测: 观望 (0.94) | 实际: 观望
04 | 日期: 2023-09-14 | 价格: 0.63 | 预测: 观望 (0.76) | 实际: 观望
05 | 日期: 2020-02-19 | 价格: 0.31 | 预测: 观望 (0.93) | 实际: 观望
06 | 日期: 2020-08-26 | 价格: 0.43 | 预测: 观望 (0.99) | 实际: 观望
07 | 日期: 2018-05-03 | 价格: 0.14 | 预测: 观望 (0.99) | 实际: 观望
08 | 日期: 2024-12-12 | 价格: 0.97 | 预测: 买入 (0.82) | 实际: 买入
09 | 日期: 2018-09-21 | 价格: 0.19 | 预测: 观望 (0.76) | 实际: 买入
10 | 日期: 2021-11-10 | 价格: 0.66 | 预测: 买入 (0.85) | 实际: 买入
11 | 日期: 2023-07-11 | 价格: 0.61 | 预测: 观望 (0.95) | 实际: 观望
12 | 日期: 2024-01-11 | 价格: 0.70 | 预测: 买入 (0.88) | 实际: 买入
13 | 日期: 2018-09-20 | 价格: 0.19 | 预测: 观望 (0.99) | 实际: 观望
14 | 日期: 2019-06-07 | 价格: 0.18 | 预测: 买入 (0.73) | 实际: 买入
15 | 日期: 2018-04-13 | 价格: 0.14 | 预测: 观望 (0.99) | 实际: 观望
16 | 日期: 2017-01-17 | 价格: 0.05 | 预测: 买入 (0.50) | 实际: 观望
17 | 日期: 201

In [28]:
import numpy as np

# 模型预测
pred_probs = model.predict(X_val)
pred_labels = np.argmax(pred_probs, axis=1)

# 实际标签
actual = y_val

# 预测为买入（1）
pred_buy_mask = pred_labels == 1
buy_correct = np.sum((pred_labels == 1) & (actual == 1))
buy_total = np.sum(pred_buy_mask)
buy_precision = buy_correct / buy_total if buy_total > 0 else 0

# 预测为卖出（2）
pred_sell_mask = pred_labels == 2
sell_correct = np.sum((pred_labels == 2) & (actual == 2))
sell_total = np.sum(pred_sell_mask)
sell_precision = sell_correct / sell_total if sell_total > 0 else 0

# 输出
print(f"\n🎯 模型预测精度分析：")
print(f"🟢 预测为『买入』时的准确率：{buy_precision:.2%}（{buy_correct}/{buy_total}）")
print(f"🔴 预测为『卖出』时的准确率：{sell_precision:.2%}（{sell_correct}/{sell_total}）")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

🎯 模型预测精度分析：
🟢 预测为『买入』时的准确率：78.32%（112/143）
🔴 预测为『卖出』时的准确率：77.14%（54/70）


In [21]:
model.save("models/stock_cnn_model.h5")




In [30]:
def predict_latest_from_raw_csv(csv_path, model_path, feature_cols, window=30):
    # === 加载模型 ===
    model = load_model(model_path)

    # === 原始数据处理 ===
    df = pd.read_csv(csv_path, parse_dates=['Date'])

    for col in ['Price', 'Open', 'High', 'Low']:
        df[col] = df[col].astype(str).str.replace(',', '').astype(float)

    # 升序排列日期，确保 tail() 拿到的是最新的30天
    df = df.sort_values('Date').reset_index(drop=True)

    # 添加技术指标（和训练保持一致）
    df['MA10'] = df['Price'].rolling(10).mean()
    df['RSI'] = compute_rsi(df['Price'])
    df['Return5'] = df['Price'].pct_change(5)
    df['Volatility5'] = df['Price'].rolling(5).std()
    df['Bias_MA10'] = (df['Price'] - df['MA10']) / df['MA10']

    df[feature_cols] = df[feature_cols].replace([np.inf, -np.inf], np.nan)

    # 取最新 window 天
    df_latest = df.tail(window).copy()

    if df_latest.shape[0] < window or df_latest[feature_cols].isna().any().any():
        print("⚠️ 最新30天样本不完整，无法推理")
        return

    # 标准化特征
    scaler = MinMaxScaler()
    df_latest[feature_cols] = scaler.fit_transform(df_latest[feature_cols])

    X_latest = np.expand_dims(df_latest[feature_cols].values, axis=0).astype(np.float32)

    # === 推理 ===
    prob = model.predict(X_latest)[0]
    label = np.argmax(prob)
    label_map = {0: "观望", 1: "买入", 2: "卖出"}

    latest_date = df_latest['Date'].iloc[-1]
    latest_price = df_latest['Price'].iloc[-1]

    print(f"\n📅 最新日期：{latest_date.date()} | 当前价格：{latest_price:.2f}")
    print(f"🤖 模型预测：{label_map[label]}（概率：{prob[label]:.2%}）")


In [31]:
feature_cols = ['Price', 'MA10', 'RSI', 'Return5', 'Volatility5', 'Bias_MA10']

predict_latest_from_raw_csv(
    csv_path='Nasdaq100Data.csv',
    model_path='models/stock_cnn_model.h5',
    feature_cols=feature_cols
)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 574ms/step

📅 最新日期：2025-05-16 | 当前价格：1.00
🤖 模型预测：观望（概率：99.45%）


In [32]:
def predict_recent_n_days(csv_path, model_path, feature_cols, window=30, n_days=20):
    import pandas as pd
    import numpy as np
    from tensorflow.keras.models import load_model
    from sklearn.preprocessing import MinMaxScaler

    def compute_rsi(series, period=14):
        delta = series.diff()
        gain = delta.clip(lower=0).rolling(window=period).mean()
        loss = -delta.clip(upper=0).rolling(window=period).mean()
        rs = gain / (loss + 1e-10)
        return 100 - (100 / (1 + rs))

    # === 加载模型 ===
    model = load_model(model_path)
    df = pd.read_csv(csv_path, parse_dates=['Date'])

    for col in ['Price', 'Open', 'High', 'Low']:
        df[col] = df[col].astype(str).str.replace(',', '').astype(float)

    df = df.sort_values('Date').reset_index(drop=True)

    # 添加指标
    df['MA10'] = df['Price'].rolling(10).mean()
    df['RSI'] = compute_rsi(df['Price'])
    df['Return5'] = df['Price'].pct_change(5)
    df['Volatility5'] = df['Price'].rolling(5).std()
    df['Bias_MA10'] = (df['Price'] - df['MA10']) / df['MA10']

    df[feature_cols] = df[feature_cols].replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=feature_cols).reset_index(drop=True)

    if len(df) < window + n_days:
        print("❌ 数据不足，无法生成最近 N 天的推理样本")
        return

    scaler = MinMaxScaler()
    df[feature_cols] = scaler.fit_transform(df[feature_cols])

    print(f"\n📊 最近 {n_days} 天预测（高置信度提示 🌟）：")
    label_map = {0: "观望", 1: "买入", 2: "卖出"}

    for i in range(-n_days, 0):
        if i - window < -len(df):
            continue
        X_seq = df[feature_cols].iloc[i - window:i].values
        if X_seq.shape != (window, len(feature_cols)):
            continue

        X_input = np.expand_dims(X_seq, axis=0).astype(np.float32)
        prob = model.predict(X_input, verbose=0)[0]
        label = np.argmax(prob)
        confidence = prob[label]

        date = df['Date'].iloc[i]
        price = df['Price'].iloc[i]

        # 高置信度标记
        marker = " 🌟 高置信度！" if confidence > 0.85 else ""

        print(f"{date.date()} | 💰{price:.2f} | 预测：{label_map[label]}（概率：{confidence:.2%}）{marker}")


In [33]:
feature_cols = ['Price', 'MA10', 'RSI', 'Return5', 'Volatility5', 'Bias_MA10']
predict_recent_n_days(
    csv_path='Nasdaq100Data.csv',
    model_path='models/stock_cnn_model.h5',
    feature_cols=feature_cols,
    window=30,
    n_days=20
)





📊 最近 20 天预测（高置信度提示 🌟）：
2025-04-21 | 💰0.76 | 预测：观望（概率：94.85%） 🌟 高置信度！
2025-04-22 | 💰0.78 | 预测：买入（概率：74.89%）
2025-04-23 | 💰0.81 | 预测：买入（概率：95.37%） 🌟 高置信度！
2025-04-24 | 💰0.84 | 预测：观望（概率：95.28%） 🌟 高置信度！
2025-04-25 | 💰0.85 | 预测：观望（概率：77.01%）
2025-04-28 | 💰0.85 | 预测：观望（概率：88.15%） 🌟 高置信度！
2025-04-29 | 💰0.85 | 预测：观望（概率：88.66%） 🌟 高置信度！
2025-04-30 | 💰0.86 | 预测：观望（概率：95.72%） 🌟 高置信度！
2025-05-01 | 💰0.87 | 预测：观望（概率：98.27%） 🌟 高置信度！
2025-05-02 | 💰0.88 | 预测：观望（概率：97.21%） 🌟 高置信度！
2025-05-05 | 💰0.88 | 预测：观望（概率：96.31%） 🌟 高置信度！
2025-05-06 | 💰0.87 | 预测：观望（概率：95.86%） 🌟 高置信度！
2025-05-07 | 💰0.87 | 预测：观望（概率：96.42%） 🌟 高置信度！
2025-05-08 | 💰0.88 | 预测：观望（概率：98.15%） 🌟 高置信度！
2025-05-09 | 💰0.88 | 预测：观望（概率：98.59%） 🌟 高置信度！
2025-05-12 | 💰0.93 | 预测：观望（概率：97.30%） 🌟 高置信度！
2025-05-13 | 💰0.95 | 预测：观望（概率：95.65%） 🌟 高置信度！
2025-05-14 | 💰0.95 | 预测：观望（概率：94.03%） 🌟 高置信度！
2025-05-15 | 💰0.95 | 预测：观望（概率：95.17%） 🌟 高置信度！
2025-05-16 | 💰0.96 | 预测：观望（概率：95.91%） 🌟 高置信度！
