In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers

np.random.seed(42)


In [None]:

data_path = "./AlibabaStock2021-2023.xlsx"

df = pd.read_excel(data_path)
print(df.head())
print(df.dtypes)


In [None]:

# 确保按日期升序排序
df = df.sort_values("日期").reset_index(drop=True)
print("数据条数：", len(df))

# 简单画一下收盘价走势
plt.figure(figsize=(10, 4))
plt.plot(df["日期"], df["收盘价"])
plt.title("阿里巴巴股票收盘价走势（2021-2023）")
plt.xlabel("日期")
plt.ylabel("收盘价")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

feature_cols = ["开盘价", "最高价", "最低价", "收盘价", "成交量"]
data = df[feature_cols].values.astype("float32")

# 按时间顺序划分为 70% 训练、15% 验证、15% 测试（先在时间步上划分，再构造序列）
n_total = len(data)
train_end = int(n_total * 0.7)

train_data = data[:train_end]

mean = train_data.mean(axis=0)
std = train_data.std(axis=0)
data_scaled = (data - mean) / std

# 目标为收盘价
target = df["收盘价"].values.astype("float32")
target_mean = target[:train_end].mean()
target_std = target[:train_end].std()
target_scaled = (target - target_mean) / target_std

print("标准化完成。第一条特征：", data_scaled[0])


In [None]:

def create_sequences(features, target, window_size=30, horizon=2):
    X, y = [], []
    for i in range(len(features) - window_size - horizon + 1):
        X.append(features[i : i + window_size])
        # 目标是窗口之后 horizon 天的值
        y.append(target[i + window_size + horizon - 1])
    return np.array(X, dtype="float32"), np.array(y, dtype="float32")


window_size = 30
horizon = 2

X_all, y_all = create_sequences(data_scaled, target_scaled,
                                window_size=window_size,
                                horizon=horizon)

print("X_all 形状:", X_all.shape)  # (样本数, 30, 5)
print("y_all 形状:", y_all.shape)


In [None]:

n_samples = X_all.shape[0]
train_end = int(n_samples * 0.7)
val_end = int(n_samples * 0.85)

X_train, y_train = X_all[:train_end], y_all[:train_end]
X_val, y_val = X_all[train_end:val_end], y_all[train_end:val_end]
X_test, y_test = X_all[val_end:], y_all[val_end:]

print("训练集：", X_train.shape, y_train.shape)
print("验证集：", X_val.shape, y_val.shape)
print("测试集：", X_test.shape, y_test.shape)


In [None]:

model = keras.models.Sequential([
    layers.Input(shape=(window_size, X_all.shape[-1])),
    layers.GRU(64, return_sequences=True),
    layers.GRU(32),
    layers.Dense(16, activation="relu"),
    layers.Dense(1)
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"]
)

model.summary()


In [None]:

early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)


In [None]:

# 绘制训练 & 验证损失/MAE 曲线
history_dict = history.history
epochs = range(1, len(history_dict["loss"]) + 1)

plt.figure(figsize=(8, 4))
plt.plot(epochs, history_dict["loss"], "bo-", label="训练损失")
plt.plot(epochs, history_dict["val_loss"], "ro-", label="验证损失")
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.title("训练 & 验证损失")
plt.legend()
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(epochs, history_dict["mae"], "bo-", label="训练 MAE")
plt.plot(epochs, history_dict["val_mae"], "ro-", label="验证 MAE")
plt.xlabel("Epoch")
plt.ylabel("MAE")
plt.title("训练 & 验证 MAE")
plt.legend()
plt.show()


In [None]:

test_mse, test_mae = model.evaluate(X_test, y_test, verbose=0)
print(f"测试集 MSE（标准化后）: {test_mse:.4f}")
print(f"测试集 MAE（标准化后）: {test_mae:.4f}")

# 预测并反标准化到真实价格
y_pred_scaled = model.predict(X_test)
y_pred = y_pred_scaled.squeeze() * target_std + target_mean
y_true = y_test * target_std + target_mean

# 打印部分样本的真实值与预测值
for i in range(10):
    print(f"样本 {i}: 真实收盘价 = {y_true[i]:.2f}, 预测收盘价 = {y_pred[i]:.2f}")


In [None]:

# 画出一段时间内的真实值 vs 预测值曲线
plt.figure(figsize=(10, 4))
plt.plot(y_true[:100], label="真实收盘价")
plt.plot(y_pred[:100], label="预测收盘价")
plt.title("测试集前 100 个样本的真实值 vs 预测值")
plt.xlabel("样本索引")
plt.ylabel("收盘价")
plt.legend()
plt.show()
