In [20]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import TensorDataset
from tqdm import tqdm
from IPython.core.interactiveshell import InteractiveShell
import copy
InteractiveShell.ast_node_interactivity = 'all'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
plt.ioff() 


<contextlib.ExitStack at 0x1a1835eb670>

In [7]:

'''
高精度、可靠的风速预报是气象学家面临的挑战。由对流风暴引起的强风，
造成相当大的破坏(大规模森林破坏.停电、建筑物/房屋损坏等)。
雷暴、龙卷风以及大冰雹、强风等对流事件是有可能扰乱日常生活的自然灾害，
特别是在有利于对流启动的复杂地形上。即使是普通的对流事件也会产生强风，
造成致命和昂贵的损失。因此，风速预测是一项重要的工作。
===本代码考虑，wind多因素的预测===
7个变量预测7个变量
步长：96
'''


class Config():
    data_path = 'D:/Study/机器学习/Project/ETTh1.csv'   # 数据path
    timestep = 96  # 时间步长，就是利用多少时间窗口
    batch_size = 32  # 批次大小
    feature_size = 7  # 每个步长对应的特征数量，这里使用所有变量
    hidden_size = 256  # 隐层大小
    output_size = 7  # 预测变量的数量
    pred_window = 96
    num_layers = 2  # gru的层数
    epochs = 2  # 迭代轮数
    best_loss = 1008611  # 记录损失
    learning_rate = 1e-3  # 学习率
    model_name = 'lstm'  # 模型名称
    save_path = '{}.pth'.format(model_name)  # 最优模型保存路径


'\n高精度、可靠的风速预报是气象学家面临的挑战。由对流风暴引起的强风，\n造成相当大的破坏(大规模森林破坏.停电、建筑物/房屋损坏等)。\n雷暴、龙卷风以及大冰雹、强风等对流事件是有可能扰乱日常生活的自然灾害，\n特别是在有利于对流启动的复杂地形上。即使是普通的对流事件也会产生强风，\n造成致命和昂贵的损失。因此，风速预测是一项重要的工作。\n===本代码考虑，wind多因素的预测===\n7个变量预测7个变量\n步长：96\n'

In [18]:
# 形成训练数据，例如12345789 12-3456789
def split_data(data, timestep, feature_size, pred_window):
    dataX = []  # 保存X
    dataY = []  # 保存Y

    # 将整个窗口的数据保存到X中，将未来96/336保存到Y中
    for index in range(len(data) - timestep - pred_window):
        dataX.append(data[index: index + timestep][:, :])  #
        dataY.append(data[index + timestep: index + timestep + pred_window][:, ])  #

    dataX = np.array(dataX)
    dataY = np.array(dataY)

    # 获取训练集大小
    train_size = int(np.round(0.6 * dataX.shape[0]))
    validation_size = int(np.round(0.8 * dataX.shape[0]))

    # 划分训练集、测试集、验证集
    x_train = dataX[: train_size, :].reshape(-1, timestep, feature_size)
    y_train = dataY[: train_size, :].reshape(-1, pred_window, feature_size)

    x_test = dataX[train_size:validation_size, :].reshape(-1, timestep, feature_size)
    y_test = dataY[train_size:validation_size, :].reshape(-1, pred_window, feature_size)

    x_val = dataX[validation_size:, :].reshape(-1, timestep, feature_size)
    y_val = dataY[validation_size:, :].reshape(-1, pred_window, feature_size)

    return [x_train, y_train, x_test, y_test, x_val, y_val]

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, pred_window):
        batch_size = x.size(0)
        seq_len = x.size(1)

        # LSTM layer
        h_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        c_0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        output, _ = self.lstm(x, (h_0, c_0))
        predictions = []
        for i in range(pred_window):
            if i < seq_len:
                out = self.fc(output[:, i, :])
            else:
                out = self.fc(output[:, -1, :])  # Use the last available time step for prediction
            predictions.append(out.unsqueeze(1))

        predictions = torch.cat(predictions, dim=1)
        return predictions

def plot_prediction(scaler2, y_test_pred, y_test, pred_window, id):
    plt.figure(figsize=(12, 8))
    plt.plot(scaler2.inverse_transform(y_test_pred[0][:, -1].detach().numpy().reshape(-1, 1)), color="b", label='predict')
    plt.plot(scaler2.inverse_transform(y_test[0][:, -1].detach().numpy().reshape(-1, 1)), "r", label='real')
    plt.xlabel('hours', fontsize=12)
    plt.ylabel('oil temperature', fontsize=12)
    plt.legend()
    plt.savefig('./Pic/oil_prediction' + pred_window + id + '-Pic.png', format='png')
    # plt.show()

In [11]:
config = Config()
config.learning_rate = 0.01
config.batch_size = 32
config.hidden_size = 64
config.num_layers = 4
df = pd.read_csv(config.data_path, index_col=0)
# df
scaler = MinMaxScaler()
# np.array(df)
data = scaler.fit_transform(np.array(df))
# data
scaler2 = MinMaxScaler()
scaler2.fit_transform(np.array(df['OT']).reshape(-1, 1))
x_train, y_train, x_test, y_test, x_val, y_val = split_data(data, config.timestep, config.feature_size, config.pred_window)

# 转为tensor
x_train_tensor = torch.from_numpy(x_train).to(torch.float32).to(device)
y_train_tensor = torch.from_numpy(y_train).to(torch.float32).to(device)
x_test_tensor = torch.from_numpy(x_test).to(torch.float32).to(device)
y_test_tensor = torch.from_numpy(y_test).to(torch.float32).to(device)
x_val_tensor = torch.from_numpy(x_val).to(torch.float32).to(device)
y_val_tensor = torch.from_numpy(y_val).to(torch.float32).to(device)

# 形成数据集
train_data = TensorDataset(x_train_tensor, y_train_tensor)
test_data = TensorDataset(x_test_tensor, y_test_tensor)
val_data = TensorDataset(x_val_tensor, y_val_tensor)

# 将数据集加载为迭代器
train_loader = torch.utils.data.DataLoader(train_data, config.batch_size, True)
test_loader = torch.utils.data.DataLoader(test_data, config.batch_size, True)
val_loader = torch.utils.data.DataLoader(val_data, config.batch_size, True)

model = LSTMModel(config.feature_size, config.hidden_size, 
                  config.output_size, config.num_layers)
                  

optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)

array([[0.69101763],
       [0.63623297],
       [0.63623297],
       ...,
       [0.28652145],
       [0.27667858],
       [0.27246592]])

In [21]:
def train(config, train=True):
    loss_mse = nn.MSELoss()  # 定义损失函数
    loss_mae = nn.L1Loss()
    l = []
    l2 = []
    optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)  # 定义优化器
    # 8.模型训练
    if train:
        for epoch in range(config.epochs):
            running_loss = 0
            # train_bar = tqdm(train_loader)  # 形成进度条
            train_bar = tqdm(train_loader)  # 验证集，确定超参数
            for data in train_bar:
                x_train, y_train = data  # 解包迭代器中的X和Y;(32,96,7),(32,96)
                optimizer.zero_grad()
                y_train_pred = model(x_train, config.pred_window)
                loss = loss_mse(y_train_pred, y_train)
                loss2 = loss_mae(y_train_pred, y_train)
                l.append(loss.item())
                l2.append(loss2.item())
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                         config.epochs,
                                                                         loss)
        torch.save(model.state_dict(), config.save_path)
        # 打印loss mse和loss mae
        # 创建图表
        plt.figure(figsize=(8, 5))
        # 绘制数据
        plt.plot(l, label='MSE')
        plt.plot(l2, label='MAE')
        # 添加标签、标题等
        plt.xlabel('epochs')
        plt.ylabel('loss')
        plt.title("MSE's and MAE's loss curve")
        plt.legend()  # 显示图例
        # 显示图表
        plt.grid(True)  # 添加网格线
        plt.savefig('LSTM-MAEandMSE-Pic.png', format='png')
        plt.show()

        return loss.detach().numpy(), loss2.detach().numpy()
    
    else:  # 预测，加载模型
        # 模型验证
        # 加载已保存的模型参数
        model.load_state_dict(torch.load(config.save_path))
        model.eval()  # 如果只是用来预测，而非继续训练，需要调用eval()
        test_loss = 0
        with torch.no_grad():
            test_bar = tqdm(test_loader)
            id = 1
            for data in test_bar:
                if id > 10:
                    break
                x_test, y_test = data
                y_test_pred = model(x_test, config.pred_window)
                # debug_var = y_test_pred[0]
                # debug_var2 = y_test[0]
                plot_prediction(scaler2, y_test_pred, y_test, str(config.pred_window), str(id))
                id += 1
                  
            test_loss = loss_mse(y_test_pred, y_test)

        if test_loss < config.best_loss:
            config.best_loss = test_loss
            torch.save(model.state_dict(), config.save_path)

    print('Finished Training')


if __name__ == "__main__":
    config = Config()
    config.learning_rate = 0.01
    config.batch_size = 16
    config.hidden_size = 64
    config.num_layers = 4
    config.pred_window = 96
    # train(config, True)
    for seed in [2, 23, 98, 1024]:  # , 5467, 20231225
        np.random.seed(seed)
        torch.manual_seed(seed)
        train(config, False)
        # pd.DataFrame({'seed': [seed], 'mse': [mse], 'mae': [mae], 'std': [std]}).to_csv('train_loss.csv', mode='a', header=False, index=True)


<torch._C.Generator at 0x1a1f4bec1f0>

  9%|▉         | 10/108 [00:01<00:16,  6.04it/s]

Finished Training





<torch._C.Generator at 0x1a1f4bec1f0>

  9%|▉         | 10/108 [00:01<00:15,  6.17it/s]

Finished Training





<torch._C.Generator at 0x1a1f4bec1f0>

  plt.figure(figsize=(12, 8))
  9%|▉         | 10/108 [00:01<00:15,  6.49it/s]

Finished Training





<torch._C.Generator at 0x1a1f4bec1f0>

  9%|▉         | 10/108 [00:01<00:14,  6.66it/s]

Finished Training



