In [23]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline


In [24]:
# 读取 TXT 文件
file_path = r'..\MoviesData\dailyPredictData\BoxingData.txt'
data = np.loadtxt(file_path)

print(data.shape)

#去掉电影名
data = data[:,1:]
tensor_data = torch.from_numpy(data).float()

print(tensor_data.type())
#（时间步，批量大小）
tensor_data = tensor_data.T
print(tensor_data.shape)

(2, 31)
torch.FloatTensor
torch.Size([30, 2])


In [25]:
test_data_size = 10

batch_size = tensor_data.shape[1]
train_data = tensor_data[:-test_data_size]
test_data = tensor_data[-test_data_size:]
print(train_data.shape)
print(test_data.shape)

torch.Size([20, 2])
torch.Size([10, 2])


In [26]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data)

train_data_normalized = torch.from_numpy(train_data_normalized).float()
print(train_data_normalized.type())

train_data_normalized = train_data_normalized.unsqueeze(2)
print(train_data_normalized)

torch.FloatTensor
tensor([[[-1.0000],
         [-1.0000]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9812],
         [-0.9812]],

        [[ 1.0000],
         [ 1.0000]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-1.0000],
         [-1.0000]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9812],
         [-0.9812]],

        [[ 1.0000],
         [ 1.0000]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]],

        [[-0.9877],
         [-0.9877]]])


In [27]:
train_window = 7

In [28]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [29]:
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

In [30]:
train_inout_seq[:5]

[(tensor([[[-1.0000],
           [-1.0000]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9812],
           [-0.9812]],
  
          [[ 1.0000],
           [ 1.0000]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]]]),
  tensor([[[-0.9877],
           [-0.9877]]])),
 (tensor([[[-0.9877],
           [-0.9877]],
  
          [[-0.9812],
           [-0.9812]],
  
          [[ 1.0000],
           [ 1.0000]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]]]),
  tensor([[[-0.9877],
           [-0.9877]]])),
 (tensor([[[-0.9812],
           [-0.9812]],
  
          [[ 1.0000],
           [ 1.0000]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]],
  
          [[-0.9877],
           [-0.9877]

In [31]:
import torch
import torch.nn as nn

class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)

    def forward(self, input_seq):
        # 获取批量大小
        batch_size = input_seq.size(1)
        # 初始化隐藏状态和细胞状态，形状为 (num_layers, batch_size, hidden_layer_size)
        self.hidden_cell = (
            torch.zeros(1, batch_size, self.hidden_layer_size),
            torch.zeros(1, batch_size, self.hidden_layer_size)
        )
        # 输入序列已经是 (时间步, 批量大小, 输入特征维度) 的形状，无需调整
        lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
        # 调整 LSTM 输出的形状，以便输入到全连接层
        # lstm_out 的形状是 (时间步, 批量大小, 隐藏层维度)
        # 调整为 (时间步 * 批量大小, 隐藏层维度)
        lstm_out_reshaped = lstm_out.view(-1, self.hidden_layer_size)
        # 通过全连接层得到预测结果
        # predictions 的形状是 (时间步 * 批量大小, 输出维度)
        predictions = self.linear(lstm_out_reshaped)
        # 重新调整预测结果的形状为 (时间步, 批量大小, 输出维度)
        predictions = predictions.view(input_seq.size(0), batch_size, -1)
        # 返回每个批次最后一个时间步的预测结果
        # 形状为 (批量大小, 输出维度)
        return predictions[-1]

In [32]:
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [33]:
epochs = 150
 
for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(batch_size, 1, model.hidden_layer_size),
                        torch.zeros(batch_size, 1, model.hidden_layer_size))
        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()
        
        
 
    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
 
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.11505309


  return F.mse_loss(input, target, reduction=self.reduction)


epoch:  26 loss: 0.01212941
epoch:  51 loss: 0.00068274
epoch:  76 loss: 0.00048626
epoch: 101 loss: 0.00027870
epoch: 126 loss: 0.00012017
epoch: 149 loss: 0.0001628689
