In [198]:
import pandas as pd
import numpy as np
import torch
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
import torch.optim as optim
from datetime import datetime, timedelta

# 加载数据
ticker = "000001.SS"
stock = yf.Ticker(ticker)
data = stock.history(start="2010-01-01", end="2024-05-10")

# 过滤数据，只包含交易日
data = data[data['Volume'] > 0]
data.drop(columns=['Dividends', 'Stock Splits'], inplace=True)

# 数据归一化
scaler = MinMaxScaler()
features = ['Open', 'High', 'Low', 'Close', 'Volume']
data[features] = scaler.fit_transform(data[features])

# 定义时间序列长度
time_steps =10  # 根据模型需求调整

# 创建序列数据
def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:(i + time_steps)][features].values)
        y.append(data.iloc[i + time_steps][features].values)
    return np.array(X), np.array(y)

# 创建训练和测试数据
train_data = data.loc[:'2022-12-31'] # 训练集包括 2023 年之前和 2024 年之后的数据
test_data = data.loc['2023-01-01':]  # 测试集包括 2023 年的数据

X_train, y_train = create_sequences(train_data, time_steps)
X_test, y_test = create_sequences(test_data, time_steps)

# 转换为张量
X_train_tensor = torch.Tensor(X_train)
y_train_tensor = torch.Tensor(y_train)
X_test_tensor = torch.Tensor(X_test)
y_test_tensor = torch.Tensor(y_test)

# DataLoader
train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=True)

# 定义模型

class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_size = time_steps * len(features)
hidden_size = 128
output_size = len(features)

model = SimpleNet(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=3e-5)

# 训练函数
def train_model(model, train_loader, val_loader, epochs=100, patience=10):
    best_val_loss = float('inf')
    epochs_no_improve = 0.2

    early_stop = False
    for epoch in range(epochs):
        if early_stop:
            print(f"Early stopping at epoch {epoch+1}")
            break
        model.train()
        train_loss = 0
        for seq, labels in train_loader:
            optimizer.zero_grad()
            seq = seq.view(seq.shape[0], -1)
            outputs = model(seq)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for seq, labels in val_loader:
                seq = seq.view(seq.shape[0], -1)
                outputs = model(seq)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
            val_loss /= len(val_loader)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve == patience:
            early_stop = True

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}, Training Loss: {train_loss:.5f}, Validation Loss: {val_loss:.5f}")

# 划分验证集
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
total_size = len(train_dataset)
val_size = int(0.2 * total_size)
train_size = total_size - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# 创建 DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

# 训练模型
train_model(model, train_loader, val_loader, epochs=400, patience=10)

# 使用模型进行预测
def pre(days):
    future_predictions = []
    with torch.no_grad():
        current_seq = X_test_tensor[-1].reshape(1, -1)
        for _ in range(days):
            prediction = model(current_seq).detach()
            future_predictions.append(prediction.numpy())
            next_seq = torch.cat((current_seq[:, len(features):], prediction), 1)
            current_seq = next_seq

    future_predictions = np.vstack(future_predictions)
    predicted_prices = scaler.inverse_transform(future_predictions)

    start_date = datetime.strptime("2024-05-13", "%Y-%m-%d")
    print("Future Predicted Prices:")
    for i, prices in enumerate(predicted_prices):
        date = start_date + timedelta(days=i)
        print(f"{date.date()}: Open: {prices[0]:.2f}, High: {prices[1]:.2f}, Low: {prices[2]:.2f}, Close: {prices[3]:.2f},")
pre(14)

Epoch 10, Training Loss: 0.09467, Validation Loss: 0.08927
Epoch 20, Training Loss: 0.04667, Validation Loss: 0.04341
Epoch 30, Training Loss: 0.01273, Validation Loss: 0.01148
Epoch 40, Training Loss: 0.00333, Validation Loss: 0.00302
Epoch 50, Training Loss: 0.00236, Validation Loss: 0.00212
Epoch 60, Training Loss: 0.00208, Validation Loss: 0.00180
Epoch 70, Training Loss: 0.00183, Validation Loss: 0.00153
Epoch 80, Training Loss: 0.00163, Validation Loss: 0.00131
Epoch 90, Training Loss: 0.00147, Validation Loss: 0.00114
Epoch 100, Training Loss: 0.00136, Validation Loss: 0.00101
Epoch 110, Training Loss: 0.00125, Validation Loss: 0.00090
Epoch 120, Training Loss: 0.00120, Validation Loss: 0.00082
Epoch 130, Training Loss: 0.00111, Validation Loss: 0.00078
Epoch 140, Training Loss: 0.00108, Validation Loss: 0.00073
Epoch 150, Training Loss: 0.00103, Validation Loss: 0.00070
Epoch 160, Training Loss: 0.00100, Validation Loss: 0.00067
Epoch 170, Training Loss: 0.00099, Validation Los