In [21]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import TensorDataset, DataLoader
import torchkeras
from plotly import graph_objects as go
from sklearn.preprocessing import MinMaxScaler

In [22]:
# 导入数据
# 数据下载：https://www.kaggle.com/kankanashukla/champagne-data
df = pd.read_csv('covid.csv', index_col=0)
df.head()

Unnamed: 0_level_0,Sales
日期,Unnamed: 1_level_1
2022-03-01,2
2022-03-02,8
2022-03-03,16
2022-03-04,19
2022-03-05,28


In [23]:
# 数据预览
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Sales'], name='Sales'))
fig.show()

In [24]:
# 数据处理
# 归一化 [0, 1]
scaler = MinMaxScaler()
predict_field = 'Scaler'
df[predict_field] = scaler.fit_transform(df['Sales'].values.reshape(-1, 1))
df.head()

Unnamed: 0_level_0,Sales,Scaler
日期,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-01,2,0.0
2022-03-02,8,0.000216
2022-03-03,16,0.000505
2022-03-04,19,0.000613
2022-03-05,28,0.000938


In [25]:
def create_dataset(data: list, time_step: int):
    arr_x, arr_y = [], []
    for i in range(len(data) - time_step - 1):
        x = data[i: i + time_step]
        y = data[i + time_step]
        arr_x.append(x)
        arr_y.append(y)
    return np.array(arr_x), np.array(arr_y)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', device)

time_step = 7
X, Y = create_dataset(df[predict_field].values, time_step)
# 转化成 tensor->(batch_size, seq_len, feature_size)
X = torch.tensor(X.reshape(-1, time_step, 1), dtype=torch.float).to(device)
Y = torch.tensor(Y.reshape(-1, 1, 1), dtype=torch.float).to(device)
print('Total datasets: ', X.shape, '-->', Y.shape)

# 划分数据
split_ratio = 1
len_train = int(X.shape[0] * split_ratio)
X_train, Y_train = X[:len_train, :, :], Y[:len_train, :, :]
print('Train datasets: ', X_train.shape, '-->', Y_train.shape)

# 构建迭代器
batch_size = 12
ds = TensorDataset(X, Y)
dl = DataLoader(ds, batch_size=batch_size, num_workers=0)
ds_train = TensorDataset(X_train, Y_train)
dl_train = DataLoader(ds_train, batch_size=batch_size, num_workers=0)

for x, y in dl_train:
    print(x.shape)
    print(y.shape)
    break



Device: cuda:0
Total datasets:  torch.Size([60, 7, 1]) --> torch.Size([60, 1, 1])
Train datasets:  torch.Size([60, 7, 1]) --> torch.Size([60, 1, 1])
torch.Size([12, 7, 1])
torch.Size([12, 1, 1])


In [26]:
# 定义模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=6, num_layers=3, batch_first=True)
        self.fc = nn.Linear(in_features=6, out_features=1)

    def forward(self, x):
        # x is input, size (seq_len, batch, input_size)
        x, _ = self.lstm(x)
        # x is output, size (seq_len, batch, hidden_size)
        x = x[:, -1, :]
        x = self.fc(x)
        y = x.view(-1, 1, 1)
        return y

In [27]:
# torchkeras API 训练方式
model = torchkeras.Model(Net())
model.summary(input_shape=(time_step, 1))
model.compile(loss_func=F.mse_loss, optimizer=torch.optim.Adam(model.parameters(), lr=1e-3), device=device)
dfhistory = model.fit(epochs=50, dl_train=dl_train, log_step_freq=20)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
              LSTM-1                 [-1, 7, 6]             888
            Linear-2                    [-1, 1]               7
Total params: 895
Trainable params: 895
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.000027
Forward/backward pass size (MB): 0.000328
Params size (MB): 0.003414
Estimated Total Size (MB): 0.003769
----------------------------------------------------------------
Start Training ...


 +-------+-------+
| epoch |  loss |
+-------+-------+
|   1   | 0.568 |
+-------+-------+


 +-------+-------+
| epoch |  loss |
+-------+-------+
|   2   | 0.542 |
+-------+-------+


 +-------+-------+
| epoch |  loss |
+-------+-------+
|   3   | 0.516 |
+-------+-------+


 +-------+-------+
| epoch |  loss |
+-------+-------+
|   4   | 0.489 |
+-------+-------+


 +-------+-------+
| epoc

In [28]:
# 模型评估
# fig = go.Figure()
# fig.add_trace(go.Scatter(x=dfhistory.index, y=dfhistory['loss'], name='loss'))
# fig.show()

In [29]:
# 预测验证预览
# y_pred = model.predict(dl)
# y_pred = y_pred.detach().numpy()
#
# fig = go.Figure()
# fig.add_trace(go.Scatter(y=Y.squeeze(), name='y_true'))
# fig.add_trace(go.Scatter(y=y_pred.squeeze(), name='y_pred'))
# fig.show()

In [30]:
# 自定义训练方式
model = Net().to(device)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3)


def train_step(model, features, labels):
    # 正向传播求损失
    predictions = model.forward(features)
    loss = loss_function(predictions, labels)
    # 反向传播求梯度
    loss.backward()
    # 参数更新
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

# 测试一个batch
features, labels = next(iter(dl_train))
loss =  train_step(model, features, labels)
loss

0.12153278291225433

In [31]:
def train_model(model, epochs):
    for epoch in range(1, epochs + 1):
        list_loss = []
        for features, labels in dl_train:
            lossi = train_step(model, features, labels)
            list_loss.append(lossi)
        loss = np.mean(list_loss)
        if epoch % 10 == 0:
            print('epoch={} | loss={} '.format(epoch, loss))


train_model(model, 1000)

epoch=10 | loss=0.11492697410285473 
epoch=20 | loss=0.07947139739990235 
epoch=30 | loss=0.03580002682283521 
epoch=40 | loss=0.028840406937524675 
epoch=50 | loss=0.02242885958403349 
epoch=60 | loss=0.015106860944069923 
epoch=70 | loss=0.00621097682742402 
epoch=80 | loss=0.0052376181287399955 
epoch=90 | loss=0.005035498758661561 
epoch=100 | loss=0.00491432045091642 
epoch=110 | loss=0.004799276516860118 
epoch=120 | loss=0.004719608649611473 
epoch=130 | loss=0.004655558036756702 
epoch=140 | loss=0.00460451026156079 
epoch=150 | loss=0.004544622940011322 
epoch=160 | loss=0.004703784991579596 
epoch=170 | loss=0.0045373394794296475 
epoch=180 | loss=0.004298944053880404 
epoch=190 | loss=0.004219959350302815 
epoch=200 | loss=0.004151657945476473 
epoch=210 | loss=0.004102600185433403 
epoch=220 | loss=0.004427253300673328 
epoch=230 | loss=0.00400843633688055 
epoch=240 | loss=0.003958734989282675 
epoch=250 | loss=0.003915652586147189 
epoch=260 | loss=0.0038680963451042773 


In [32]:
# 预测验证预览

y_pred = model.forward(X)
print(X.shape)
print(y_pred.shape)
# print(y_pred)
y_p_pred = y_pred.detach().cpu().numpy().squeeze()
Y_p_true = Y.cpu().squeeze()
fig = go.Figure()
fig.add_trace(go.Scatter(y=Y_p_true, name='y_true'))
fig.add_trace(go.Scatter(y=y_p_pred, name='y_pred'))
fig.show()

torch.Size([60, 7, 1])
torch.Size([60, 1, 1])


In [34]:
# 外推20个点
n = 7

list_y_pre = []
x = X[-1].view(1, time_step, 1)

for n in range(1, n + 1):
    y = model.forward(x)
    # y = y.view(-1,1,1)
    list_y_pre.append(y.item())
    x = torch.cat([x, y], dim=1)
    x = x[:, 1:, :]

y_p2_pred = np.concatenate((y_p_pred, np.array(list_y_pre)))
fig = go.Figure()
fig.add_trace(go.Scatter(y=Y_p_true, name='y_true'))
fig.add_trace(go.Scatter(y=y_p2_pred, name='y_pred'))
fig.show()