# 数据下载

In [5]:
import yfinance as yf
# ticker='BABA'  #阿里巴巴
# ticker='BIDU'  #百度
ticker='TCOM'  #携程

start_date="2012-01-01"
end_date="2024-05-01"

data = yf.download(ticker, start=start_date, end=end_date)
data.to_csv('../data/'+ticker+'_stock_data.csv')

[*********************100%%**********************]  1 of 1 completed


# 以下为两层感知机为基础模型的AC训练算法（实验效果不佳）

In [None]:
# import os
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from tqdm import tqdm
# from torch.utils.data import Dataset, DataLoader

# # 定义神经网络架构
# class ActorCritic(nn.Module):
#     def __init__(self, input_dim, hidden_dim, action_dim):
#         super(ActorCritic, self).__init__()
#         self.actor = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.ReLU(),
#             nn.Linear(hidden_dim, action_dim),
#             nn.Softmax(dim=-1)
#         )
#         self.critic = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.ReLU(),
#             nn.Linear(hidden_dim, 1)
#         )

#     def forward(self, x):
#         action_probs = self.actor(x)
#         state_value = self.critic(x)
#         return action_probs, state_value

# # 自定义数据集
# class StockDataset(Dataset):
#     def __init__(self, csv_file, seq_length=10, end_date='2021-06-06'):
#         self.data = pd.read_csv(csv_file)
#         self.data['datetime'] = pd.to_datetime(self.data['datetime'])
        
#         # 筛选训练数据
#         self.data = self.data[self.data['datetime'] <= pd.to_datetime(end_date)]
        
#         self.data.set_index('datetime', inplace=True)
#         self.features = self.data[['open', 'high', 'low', 'close', 'volume']].values
#         self.targets = self.data['close'].values
#         self.seq_length = seq_length

#     def __len__(self):
#         return len(self.data) - self.seq_length + 1

#     def __getitem__(self, idx):
#         return (torch.tensor(self.features[idx:idx+self.seq_length], dtype=torch.float32),
#                 torch.tensor(self.targets[idx+self.seq_length-1], dtype=torch.float32))
    

# def train_AC(model, dataloader, optimizer, num_epochs, model_path):
#     model.train()
#     criterion = nn.MSELoss()
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     model.to(device)
    
#     for epoch in range(num_epochs):
#         epoch_loss = 0
#         for states, targets in tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs}'):
#             states, targets = states.to(device), targets.to(device)
#             optimizer.zero_grad()
#             action_probs, state_values = model(states)
#             loss = criterion(state_values.squeeze(), targets)
#             loss.backward()
#             optimizer.step()
#             epoch_loss += loss.item()
#         print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')

#         # 保存模型
#         torch.save(model.state_dict(), model_path)
#         print(f"Model saved to {model_path}")

# if __name__ == "__main__":
#     csv_file = '../data/BABA_stock_data.csv'
#     model_path = '../saved_models/AC_model.pth'
#     input_dim = 5
#     hidden_dim = 128
#     action_dim = 3
#     learning_rate = 1e-4
#     num_epochs = 10
#     seq_length = 10 #窗口时间长度

#     dataset = StockDataset(csv_file, seq_length=seq_length, end_date='2021-06-06')
#     dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

#     model = ActorCritic(input_dim, hidden_dim, action_dim)
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     # 加载先前训练好的模型
#     if os.path.exists(model_path):
#         model.load_state_dict(torch.load(model_path))
#         print(f"Loaded model from {model_path}")

#     train_AC(model, dataloader, optimizer, num_epochs, model_path)


# 以下为LSTM为基础模型的AC训练算法

# 增加采样与奖励函数

In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, SequentialSampler, BatchSampler

# 定义Actor-Critic网络
class ActorCritic(nn.Module):
    def __init__(self, input_dim, hidden_dim, action_dim):
        super(ActorCritic, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.actor = nn.Sequential(
            nn.Linear(hidden_dim, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # 取最后一个时间步的输出
        action_probs = self.actor(lstm_out)
        state_value = self.critic(lstm_out)
        return action_probs, state_value

# 自定义数据集
class StockDataset(Dataset):
    def __init__(self, csv_file, seq_length=10, end_date='2021-06-06'):
        self.data = pd.read_csv(csv_file)
        self.data['datetime'] = pd.to_datetime(self.data['datetime'])
        
        # 筛选训练数据
        self.data = self.data[self.data['datetime'] <= pd.to_datetime(end_date)]
        
        self.data.set_index('datetime', inplace=True)
        self.features = self.data[['open', 'high', 'low', 'close', 'volume']].values
        self.targets = self.data['close'].values
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length + 1

    def __getitem__(self, idx):
        return (torch.tensor(self.features[idx:idx+self.seq_length], dtype=torch.float32),
                torch.tensor(self.targets[idx+self.seq_length-1], dtype=torch.float32))




# 定义奖励函数
def calculate_returns(rewards, gamma):
    returns = []
    R = 0
    for r in reversed(rewards):
        R = r + gamma * R
        returns.insert(0, R)
    return returns

def train_AC(model, dataloader, optimizer, num_epochs, model_path, loss_file_path, gamma=0.99):
    model.train()
    criterion = nn.MSELoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # 加载先前的loss记录
    if os.path.exists(loss_file_path):
        loss_df = pd.read_csv(loss_file_path)
        start_epoch = len(loss_df)
    else:
        loss_df = pd.DataFrame(columns=['epoch', 'loss'])
        start_epoch = 0

    for epoch in range(start_epoch, start_epoch + num_epochs):
        state_memory = []
        action_memory = []
        reward_memory = []
        next_state_memory = []
        epoch_loss = 0

        for states, targets in tqdm(dataloader, desc=f'Epoch {epoch+1}/{start_epoch + num_epochs}'):
            states, targets = states.to(device), targets.to(device)
            action_probs, state_values = model(states)

            actions = torch.multinomial(action_probs, 1).squeeze().tolist()
            action_memory.extend(actions)

            reward = targets - states[:, -1, 3]  # 使用价格变化作为奖励
            reward_memory.extend(reward.tolist())

            next_states = states[:, 1:, :]  # 移动一个时间步
            next_state_values = model(next_states)[1]
            next_state_memory.extend(next_state_values.squeeze().tolist())

            state_memory.extend(state_values.squeeze().tolist())

            # 计算损失并更新模型
            if len(reward_memory) >= seq_length:
                rewards = calculate_returns(reward_memory, gamma)
                rewards = torch.tensor(rewards, dtype=torch.float32).to(device)
                state_values = torch.tensor(state_memory, dtype=torch.float32).to(device)
                next_state_values = torch.tensor(next_state_memory, dtype=torch.float32).to(device)

                advantage = rewards - state_values + gamma * next_state_values #优势函数

                critic_loss = criterion(state_values, rewards)
                action_probs_selected = action_probs.gather(1, torch.tensor(action_memory).unsqueeze(1).to(device))
                actor_loss = -torch.mean(torch.log(action_probs_selected) * advantage)

                loss = actor_loss + critic_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                state_memory = []
                action_memory = []
                reward_memory = []
                next_state_memory = []

                epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(dataloader)
        print(f'Epoch {epoch+1}, Loss: {avg_epoch_loss}')

        # 保存模型
        if epoch % 10 == 0 or epoch == start_epoch + num_epochs -1:
            torch.save(model.state_dict(), model_path)
            print(f"模型存储到 {model_path}")

        # 保存loss记录
        new_loss_record = pd.DataFrame({'epoch': [epoch + 1], 'loss': [avg_epoch_loss]})
        loss_df = pd.concat([loss_df, new_loss_record], ignore_index=True)
        loss_df.to_csv(loss_file_path, index=False)
        print(f"Loss record saved to {loss_file_path}")

if __name__ == "__main__":
    csv_file = '../data/BABA_stock_data.csv'
    model_path = '../saved_models/ac_lstm_model.pth'
    loss_file_path = '../saved_models/ac_loss_record.csv'
    input_dim = 5
    hidden_dim = 128
    action_dim = 3
    learning_rate = 1e-4
    num_epochs = 5 # 150,有随时继续训练的功能，强化学习就是这点好，采样时噪声多
    seq_length = 30

    dataset = StockDataset(csv_file, seq_length=seq_length, end_date='2021-06-06')
    sampler = SequentialSampler(dataset)
    batch_sampler = BatchSampler(sampler, batch_size=32, drop_last=False)
    dataloader = DataLoader(dataset, batch_sampler=batch_sampler)

    model = ActorCritic(input_dim, hidden_dim, action_dim)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 加载先前训练好的模型
    if os.path.exists(model_path):
        model.load_state_dict(torch.load(model_path))
        print(f"Loaded model from {model_path}")

    train_AC(model, dataloader, optimizer, num_epochs, model_path, loss_file_path)


Loaded model from ../saved_models/ac_lstm_model.pth


Epoch 166/170: 100%|██████████| 52/52 [00:02<00:00, 25.84it/s]


Epoch 166, Loss: 0.12009964826015326
Loss record saved to ../saved_models/ac_loss_record.csv


Epoch 167/170: 100%|██████████| 52/52 [00:01<00:00, 38.52it/s]


Epoch 167, Loss: 0.12017225932616454
Loss record saved to ../saved_models/ac_loss_record.csv


Epoch 168/170: 100%|██████████| 52/52 [00:01<00:00, 29.09it/s]


Epoch 168, Loss: 0.12018046957942155
Loss record saved to ../saved_models/ac_loss_record.csv


Epoch 169/170: 100%|██████████| 52/52 [00:01<00:00, 37.90it/s]


Epoch 169, Loss: 0.12017775685168229
Loss record saved to ../saved_models/ac_loss_record.csv


Epoch 170/170: 100%|██████████| 52/52 [00:01<00:00, 33.75it/s]

Epoch 170, Loss: 0.12017997153676473
模型存储到 ../saved_models/ac_lstm_model.pth
Loss record saved to ../saved_models/ac_loss_record.csv





# 可视化AC_LSTM模型性能（纯粹看看能不能跑，真实性能还是要看回测的）

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, SequentialSampler, BatchSampler

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, SequentialSampler, BatchSampler

class ActorCritic(nn.Module):
    def __init__(self, input_dim, hidden_dim, action_dim):
        super(ActorCritic, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.actor = nn.Sequential(
            nn.Linear(hidden_dim, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = lstm_out[:, -1, :]  # 取最后一个时间步的输出
        action_probs = self.actor(lstm_out)
        state_value = self.critic(lstm_out)
        return action_probs, state_value

class StockDataset(Dataset):
    def __init__(self, csv_file, seq_length=10):
        self.data = pd.read_csv(csv_file)
        self.data['datetime'] = pd.to_datetime(self.data['datetime'])
        self.data.set_index('datetime', inplace=True)
        self.features = self.data[['open', 'high', 'low', 'close', 'volume']].values
        self.targets = self.data['close'].values
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length + 1

    def __getitem__(self, idx):
        return (torch.tensor(self.features[idx:idx+self.seq_length], dtype=torch.float32),
                torch.tensor(self.targets[idx+self.seq_length-1], dtype=torch.float32))

def plot_loss_curve(loss_file_path):
    # 读取损失记录文件
    loss_df = pd.read_csv(loss_file_path)
    
    # 绘制损失曲线
    plt.figure(figsize=(10, 5))
    plt.plot(loss_df['epoch'], loss_df['loss'], label='Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss Curve')
    plt.legend()
    plt.show()

def plot_predictions(csv_file, model_path, seq_length):
    # 加载数据和模型
    dataset = StockDataset(csv_file, seq_length=seq_length)
    sampler = SequentialSampler(dataset)
    batch_sampler = BatchSampler(sampler, batch_size=32, drop_last=False)
    dataloader = DataLoader(dataset, batch_sampler=batch_sampler)
    
    model = ActorCritic(input_dim=5, hidden_dim=128, action_dim=3)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    
    # 获取实际值和预测值
    actuals = []
    predictions = []
    with torch.no_grad():
        for states, targets in dataloader:
            action_probs, state_values = model(states)
            predictions.extend(state_values.squeeze().numpy())
            actuals.extend(targets.numpy())
    
    # 绘制实际值和预测值对比图
    plt.figure(figsize=(15, 7))
    plt.plot(actuals, label='Actual Values')
    plt.plot(predictions, label='Predicted Values')
    plt.xlabel('Time Step')
    plt.ylabel('Stock Price')
    plt.title('Actual vs Predicted Stock Prices')
    plt.legend()
    plt.show()

def plot_profit_curve(csv_file, model_path, seq_length, initial_balance=10000):
    # 加载数据和模型
    dataset = StockDataset(csv_file, seq_length=seq_length)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
    
    model = ActorCritic(input_dim=5, hidden_dim=128, action_dim=3)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    
    # 模拟交易并计算收益
    balance = initial_balance
    positions = 0
    profit_curve = [balance]
    
    with torch.no_grad():
        for states, targets in dataloader:
            action_probs, state_values = model(states)
            action = torch.argmax(action_probs).item()
            price = targets.item()
            
            # 根据动作执行交易
            if action == 0:  # Buy
                positions += 1
                balance -= price
            elif action == 1:  # Sell
                positions -= 1
                balance += price
            
            # 更新收益曲线
            profit_curve.append(balance + positions * price)
    
    # 绘制收益曲线
    plt.figure(figsize=(15, 7))
    plt.plot(profit_curve, label='Profit Curve')
    plt.xlabel('Time Step')
    plt.ylabel('Balance')
    plt.title('Trading Profit Curve')
    plt.legend()
    plt.show()

if __name__ == "__main__":
    csv_file = '../data/BABA_stock_data.csv'
    model_path = '../saved_models/ac_lstm_model.pth'
    loss_file_path = '../saved_models/ac_loss_record.csv'
    seq_length = 10

    # 绘制损失曲线
    plot_loss_curve(loss_file_path)

    # # 绘制预测值与实际值对比图
    # plot_predictions(csv_file, model_path, seq_length)

    # 绘制收益曲线
    # plot_profit_curve(csv_file, model_path, seq_length)
