In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.utils.data as Data
import numpy as np
import pandas as pd
from tqdm import tqdm
# import optuna
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import math
import warnings
import random
from torch.backends import cudnn

warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False



In [2]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
cudnn.benchmark = False
cudnn.deterministic = True

# 用30天的数据预测下一天
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = pd.read_csv('ETTh1.csv')
print(data)
data.dropna(axis=0, how='any')
data_x = data.iloc[:, 1:]


                      date    HUFL   HULL   MUFL   MULL   LUFL   LULL  \
0      2016-07-01 00:00:00   5.827  2.009  1.599  0.462  4.203  1.340   
1      2016-07-01 01:00:00   5.693  2.076  1.492  0.426  4.142  1.371   
2      2016-07-01 02:00:00   5.157  1.741  1.279  0.355  3.777  1.218   
3      2016-07-01 03:00:00   5.090  1.942  1.279  0.391  3.807  1.279   
4      2016-07-01 04:00:00   5.358  1.942  1.492  0.462  3.868  1.279   
...                    ...     ...    ...    ...    ...    ...    ...   
17415  2018-06-26 15:00:00  -1.674  3.550 -5.615  2.132  3.472  1.523   
17416  2018-06-26 16:00:00  -5.492  4.287 -9.132  2.274  3.533  1.675   
17417  2018-06-26 17:00:00   2.813  3.818 -0.817  2.097  3.716  1.523   
17418  2018-06-26 18:00:00   9.243  3.818  5.472  2.097  3.655  1.432   
17419  2018-06-26 19:00:00  10.114  3.550  6.183  1.564  3.716  1.462   

              OT  
0      30.531000  
1      27.787001  
2      27.787001  
3      25.044001  
4      21.948000  
...      

In [14]:

data_96_x = []
data_96_y = []
for i in range(0, len(data_x) - 95, 96):
    data_96_x.append(data_x[i: i+95])
    data_96_y.append(data_x.iloc[i+95, -1])
print(len(data_96_x), len(data_96_y))
x_train, x_test, y_train, y_test = train_test_split(np.array(data_96_x), np.array(data_96_y), test_size=0.2, random_state=1, shuffle=True)

print(len(x_train))
print(len(x_test))


181 181
144
37


In [4]:
class DataSet(Data.Dataset):
    def __init__(self, data_inputs, data_targets):
        self.inputs = torch.FloatTensor(data_inputs)
        self.label = torch.FloatTensor(data_targets)
    
    def __getitem__(self, index):
        return self.inputs[index], self.label[index]

    def __len__(self):
        return len(self.inputs)


In [7]:
Batch_Size = 8
dataSet = DataSet(np.array(x_train), list(y_train))
train_size = int(len(x_train) * 0.7)
test_size = len(y_train) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataSet, [train_size, test_size])
TrainDataLoader = Data.DataLoader(train_dataset, batch_size=Batch_Size, shuffle=True, drop_last=True)
TestDataLoader = Data.DataLoader(test_dataset, batch_size=Batch_Size, shuffle=True, drop_last=True)


In [33]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        pe.requires_grad = False
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor):
        chunk = x.chunk(x.size(-1), dim=2)
        out = torch.Tensor([]).to(x.device)
        for i in range(len(chunk)):
            out = torch.cat((out, chunk[i] + self.pe[:chunk[i].size(0), ...]), dim=2)
        return out
    
def transformer_generate_tgt_mask(length, device):
    mask = torch.tril(torch.ones(length, length, device=device)) == 1
    mask = (
        mask.float()
        .masked_fill(mask == 0, float('-inf'))
        .masked_fill(mask == 1, float(0.0))
    )
    return mask

class Transformer(nn.Module):
    def __init__(self, n_encoder_inputs, n_decoder_inputs, Sequence_length, d_model=512, dropout=0.1, num_layers=8):
        super(Transformer, self).__init__()
        self.input_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)
        self.target_pos_embedding = torch.nn.Embedding(500, embedding_dim=d_model)

        encoder_layer = torch.nn.TransformerEncoderLayer(d_model=d_model, nhead=num_layers, dropout=dropout, dim_feedforward=4*d_model)
        decoder_layer = torch.nn.TransformerDecoderLayer(d_model=d_model, nhead=num_layers, dropout=dropout, dim_feedforward=4*d_model)

        self.encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.decoder = torch.nn.TransformerDecoder(decoder_layer, num_layers=4)

        self.input_projection = torch.nn.Linear(n_encoder_inputs, d_model)
        self.output_projection = torch.nn.Linear(n_decoder_inputs, d_model)

        self.linear = torch.nn.Linear(d_model, 1)
        self.ziji_add_linear = torch.nn.Linear(Sequence_length, 1)

    def encode_in(self, src):
        src_start = self.input_projection(src).permute(1, 0, 2)
        in_sequence_len, batch_size = src_start.size(0), src_start.size(1)
        pos_encoder = (torch.arange(0, in_sequence_len, device=src.device).unsqueeze(0).repeat(batch_size, 1))
        pos_encoder = self.input_pos_embedding(pos_encoder).permute(1, 0, 2)
        src = src_start + pos_encoder
        src = self.encoder(src) + src_start
        return src

    def decode_out(self, tgt, memory):
        tgt_start = self.output_projection(tgt).permute(1, 0, 2)
        out_sequence_len, batch_size = tgt_start.size(0), tgt_start.size(1)
        pos_decoder = (torch.arange(0, out_sequence_len, device=tgt.device).unsqueeze(0).repeat(batch_size, 1))
        pos_decoder = self.target_pos_embedding(pos_decoder).permute(1, 0, 2)
        tgt = tgt_start + pos_decoder
        tgt_mask = transformer_generate_tgt_mask(out_sequence_len, tgt.device)
        out = self.decoder(tgt=tgt, memory=memory, tgt_mask=tgt_mask) + tgt_start
        out = out.permute(1, 0, 2)
        out = self.linear(out)
        return out
    
    def forward(self, src, target_in):
        src = self.encode_in(src)
        out = self.decode_out(tgt=target_in, memory=src)
        # torch.Size([batch, 3, 1])
        out = out.squeeze(2)
        out = self.ziji_add_linear(out)
        return out

model = Transformer(n_encoder_inputs=7, n_decoder_inputs=7, Sequence_length=95).to(device) # 3表示Sequence_length transformer输入数据 序列的长度

def test_main(model):
    val_epoch_loss = []
    with torch.no_grad():
        for index, (inputs, targets) in enumerate(TestDataLoader):
            inputs = torch.tensor(inputs).to(device)
            targets = torch.tensor(targets).to(device)
            inputs = inputs.float()
            targets = targets.float()
            tgt_in = torch.rand((Batch_Size, 95, 7))
            outputs = model(inputs, tgt_in)
            loss = criterion(outputs.float(), targets.float())
            val_epoch_loss.append(loss.item())
    return np.mean(val_epoch_loss)


In [35]:
epochs = 50
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.0001)
criterion = torch.nn.MSELoss().to(device)

val_loss = []
train_loss = []
best_test_loss = 10000000
for epoch in tqdm(range(epochs)):
    train_epoch_loss = []
    for index, (inputs, targets) in enumerate(TrainDataLoader):
        inputs = torch.tensor(inputs).to(device)
        targets = torch.tensor(targets).to(device)
        inputs = inputs.float()
        targets = targets.float()

        tgt_in = torch.rand((Batch_Size, 95, 7))
        outputs = model(inputs, tgt_in)
        loss = criterion(outputs.float(), targets.float())
        print('loss:', loss)
        loss.backward()
        optimizer.step()
        train_epoch_loss.append(loss.item())
    train_loss.append(np.mean(train_epoch_loss))
    val_epoch_loss = test_main(model)
    val_loss.append(val_epoch_loss)
    print('epoch:', epoch, "train_epoch_loss:", np.mean(train_epoch_loss), 'val_epoch_loss:', val_epoch_loss)

    if val_epoch_loss < best_test_loss:
        best_test_loss = val_epoch_loss
        best_model = model
        print('best_test_loss ---------------------', best_test_loss)
        torch.save(best_model.state_dict(), 'best_Transformer_trainModel.pth')
        

  0%|          | 0/50 [00:00<?, ?it/s]

loss: tensor(78.8761, grad_fn=<MseLossBackward0>)
loss: tensor(67.1492, grad_fn=<MseLossBackward0>)
loss: tensor(171.9894, grad_fn=<MseLossBackward0>)
loss: tensor(35.6237, grad_fn=<MseLossBackward0>)
loss: tensor(149.0495, grad_fn=<MseLossBackward0>)
loss: tensor(135.4158, grad_fn=<MseLossBackward0>)
loss: tensor(79.7751, grad_fn=<MseLossBackward0>)
loss: tensor(153.8423, grad_fn=<MseLossBackward0>)
loss: tensor(142.4051, grad_fn=<MseLossBackward0>)
loss: tensor(63.2607, grad_fn=<MseLossBackward0>)
loss: tensor(106.1435, grad_fn=<MseLossBackward0>)
loss: tensor(60.8025, grad_fn=<MseLossBackward0>)


  2%|▏         | 1/50 [00:10<08:40, 10.63s/it]

epoch: 0 train_epoch_loss: 103.69441254933675 val_epoch_loss: 87.50945129394532
best_test_loss --------------------- 87.50945129394532
loss: tensor(92.3335, grad_fn=<MseLossBackward0>)
loss: tensor(43.8874, grad_fn=<MseLossBackward0>)
loss: tensor(91.1822, grad_fn=<MseLossBackward0>)
loss: tensor(203.1676, grad_fn=<MseLossBackward0>)
loss: tensor(48.1998, grad_fn=<MseLossBackward0>)
loss: tensor(103.3121, grad_fn=<MseLossBackward0>)
loss: tensor(119.2066, grad_fn=<MseLossBackward0>)
loss: tensor(27.4668, grad_fn=<MseLossBackward0>)
loss: tensor(132.1879, grad_fn=<MseLossBackward0>)
loss: tensor(73.5120, grad_fn=<MseLossBackward0>)
loss: tensor(53.0347, grad_fn=<MseLossBackward0>)
loss: tensor(36.7185, grad_fn=<MseLossBackward0>)


  4%|▍         | 2/50 [00:21<08:46, 10.97s/it]

epoch: 1 train_epoch_loss: 85.3507661819458 val_epoch_loss: 78.05794677734374
best_test_loss --------------------- 78.05794677734374
loss: tensor(42.7589, grad_fn=<MseLossBackward0>)
loss: tensor(47.5410, grad_fn=<MseLossBackward0>)
loss: tensor(47.7051, grad_fn=<MseLossBackward0>)
loss: tensor(40.2994, grad_fn=<MseLossBackward0>)
loss: tensor(225.5191, grad_fn=<MseLossBackward0>)
loss: tensor(48.6994, grad_fn=<MseLossBackward0>)
loss: tensor(146.1127, grad_fn=<MseLossBackward0>)
loss: tensor(82.9128, grad_fn=<MseLossBackward0>)
loss: tensor(100.7292, grad_fn=<MseLossBackward0>)
loss: tensor(43.5283, grad_fn=<MseLossBackward0>)
loss: tensor(54.7037, grad_fn=<MseLossBackward0>)
loss: tensor(138.5519, grad_fn=<MseLossBackward0>)


  6%|▌         | 3/50 [00:32<08:34, 10.95s/it]

epoch: 2 train_epoch_loss: 84.92179044087727 val_epoch_loss: 67.42816543579102
best_test_loss --------------------- 67.42816543579102
loss: tensor(88.8977, grad_fn=<MseLossBackward0>)
loss: tensor(61.4241, grad_fn=<MseLossBackward0>)
loss: tensor(39.8491, grad_fn=<MseLossBackward0>)
loss: tensor(45.9692, grad_fn=<MseLossBackward0>)
loss: tensor(39.3266, grad_fn=<MseLossBackward0>)
loss: tensor(56.4755, grad_fn=<MseLossBackward0>)
loss: tensor(60.1059, grad_fn=<MseLossBackward0>)
loss: tensor(86.0389, grad_fn=<MseLossBackward0>)
loss: tensor(94.4197, grad_fn=<MseLossBackward0>)
loss: tensor(89.8130, grad_fn=<MseLossBackward0>)
loss: tensor(128.0169, grad_fn=<MseLossBackward0>)
loss: tensor(127.2902, grad_fn=<MseLossBackward0>)


  8%|▊         | 4/50 [00:44<08:36, 11.23s/it]

epoch: 3 train_epoch_loss: 76.46889686584473 val_epoch_loss: 61.933041381835935
best_test_loss --------------------- 61.933041381835935
loss: tensor(111.3793, grad_fn=<MseLossBackward0>)
loss: tensor(75.7264, grad_fn=<MseLossBackward0>)
loss: tensor(23.1671, grad_fn=<MseLossBackward0>)
loss: tensor(68.4831, grad_fn=<MseLossBackward0>)
loss: tensor(144.1374, grad_fn=<MseLossBackward0>)
loss: tensor(62.7972, grad_fn=<MseLossBackward0>)
loss: tensor(43.1568, grad_fn=<MseLossBackward0>)
loss: tensor(47.2831, grad_fn=<MseLossBackward0>)
loss: tensor(31.5872, grad_fn=<MseLossBackward0>)
loss: tensor(79.1731, grad_fn=<MseLossBackward0>)
loss: tensor(100.3916, grad_fn=<MseLossBackward0>)
loss: tensor(59.9950, grad_fn=<MseLossBackward0>)


 10%|█         | 5/50 [00:55<08:19, 11.10s/it]

epoch: 4 train_epoch_loss: 70.6064403851827 val_epoch_loss: 72.6939884185791
loss: tensor(48.4739, grad_fn=<MseLossBackward0>)
loss: tensor(64.7419, grad_fn=<MseLossBackward0>)
loss: tensor(101.7580, grad_fn=<MseLossBackward0>)
loss: tensor(114.5248, grad_fn=<MseLossBackward0>)
loss: tensor(194.6150, grad_fn=<MseLossBackward0>)
loss: tensor(106.1021, grad_fn=<MseLossBackward0>)
loss: tensor(104.3403, grad_fn=<MseLossBackward0>)
loss: tensor(23.0918, grad_fn=<MseLossBackward0>)
loss: tensor(51.6682, grad_fn=<MseLossBackward0>)
loss: tensor(63.2104, grad_fn=<MseLossBackward0>)
loss: tensor(26.2515, grad_fn=<MseLossBackward0>)
loss: tensor(31.4692, grad_fn=<MseLossBackward0>)


 12%|█▏        | 6/50 [01:06<08:08, 11.10s/it]

epoch: 5 train_epoch_loss: 77.52059412002563 val_epoch_loss: 81.92565078735352
loss: tensor(89.8393, grad_fn=<MseLossBackward0>)
loss: tensor(49.6437, grad_fn=<MseLossBackward0>)
loss: tensor(110.0522, grad_fn=<MseLossBackward0>)
loss: tensor(60.2775, grad_fn=<MseLossBackward0>)
loss: tensor(49.1338, grad_fn=<MseLossBackward0>)
loss: tensor(115.5078, grad_fn=<MseLossBackward0>)
loss: tensor(75.5929, grad_fn=<MseLossBackward0>)
loss: tensor(95.2709, grad_fn=<MseLossBackward0>)
loss: tensor(29.3067, grad_fn=<MseLossBackward0>)
loss: tensor(68.4454, grad_fn=<MseLossBackward0>)
loss: tensor(165.3279, grad_fn=<MseLossBackward0>)
loss: tensor(26.7091, grad_fn=<MseLossBackward0>)


 14%|█▍        | 7/50 [01:17<07:56, 11.07s/it]

epoch: 6 train_epoch_loss: 77.92561372121175 val_epoch_loss: 63.552642059326175
loss: tensor(109.7583, grad_fn=<MseLossBackward0>)
loss: tensor(49.9942, grad_fn=<MseLossBackward0>)
loss: tensor(28.7270, grad_fn=<MseLossBackward0>)
loss: tensor(102.0252, grad_fn=<MseLossBackward0>)
loss: tensor(113.8946, grad_fn=<MseLossBackward0>)
loss: tensor(54.9831, grad_fn=<MseLossBackward0>)
loss: tensor(64.2636, grad_fn=<MseLossBackward0>)
loss: tensor(39.9179, grad_fn=<MseLossBackward0>)
loss: tensor(50.1348, grad_fn=<MseLossBackward0>)
loss: tensor(62.8024, grad_fn=<MseLossBackward0>)
loss: tensor(120.7890, grad_fn=<MseLossBackward0>)
loss: tensor(86.6030, grad_fn=<MseLossBackward0>)


 16%|█▌        | 8/50 [01:28<07:45, 11.09s/it]

epoch: 7 train_epoch_loss: 73.65777683258057 val_epoch_loss: 68.6842254638672
loss: tensor(25.5399, grad_fn=<MseLossBackward0>)
loss: tensor(187.2484, grad_fn=<MseLossBackward0>)
loss: tensor(111.5023, grad_fn=<MseLossBackward0>)
loss: tensor(31.8506, grad_fn=<MseLossBackward0>)
loss: tensor(120.8787, grad_fn=<MseLossBackward0>)
loss: tensor(34.8882, grad_fn=<MseLossBackward0>)
loss: tensor(126.6990, grad_fn=<MseLossBackward0>)
loss: tensor(48.7600, grad_fn=<MseLossBackward0>)
loss: tensor(39.1501, grad_fn=<MseLossBackward0>)
loss: tensor(46.4400, grad_fn=<MseLossBackward0>)
loss: tensor(144.0397, grad_fn=<MseLossBackward0>)
loss: tensor(64.7728, grad_fn=<MseLossBackward0>)


 18%|█▊        | 9/50 [01:39<07:36, 11.14s/it]

epoch: 8 train_epoch_loss: 81.81412760416667 val_epoch_loss: 92.97881393432617
loss: tensor(135.4029, grad_fn=<MseLossBackward0>)
loss: tensor(84.3807, grad_fn=<MseLossBackward0>)
loss: tensor(131.0869, grad_fn=<MseLossBackward0>)
loss: tensor(88.4692, grad_fn=<MseLossBackward0>)
loss: tensor(248.0384, grad_fn=<MseLossBackward0>)
loss: tensor(66.4545, grad_fn=<MseLossBackward0>)
loss: tensor(49.5689, grad_fn=<MseLossBackward0>)
loss: tensor(34.7752, grad_fn=<MseLossBackward0>)
loss: tensor(96.8026, grad_fn=<MseLossBackward0>)
loss: tensor(82.2689, grad_fn=<MseLossBackward0>)
loss: tensor(255.7249, grad_fn=<MseLossBackward0>)
loss: tensor(237.8895, grad_fn=<MseLossBackward0>)


 20%|██        | 10/50 [01:51<07:27, 11.18s/it]

epoch: 9 train_epoch_loss: 125.90521335601807 val_epoch_loss: 111.85735778808593
loss: tensor(313.0526, grad_fn=<MseLossBackward0>)
loss: tensor(109.0425, grad_fn=<MseLossBackward0>)
loss: tensor(288.0482, grad_fn=<MseLossBackward0>)
loss: tensor(158.6162, grad_fn=<MseLossBackward0>)
loss: tensor(138.3356, grad_fn=<MseLossBackward0>)
loss: tensor(126.2940, grad_fn=<MseLossBackward0>)
loss: tensor(131.5476, grad_fn=<MseLossBackward0>)
loss: tensor(107.3021, grad_fn=<MseLossBackward0>)
loss: tensor(93.3259, grad_fn=<MseLossBackward0>)
loss: tensor(42.2287, grad_fn=<MseLossBackward0>)
loss: tensor(174.4043, grad_fn=<MseLossBackward0>)
loss: tensor(99.4876, grad_fn=<MseLossBackward0>)


 22%|██▏       | 11/50 [02:02<07:17, 11.22s/it]

epoch: 10 train_epoch_loss: 148.47378253936768 val_epoch_loss: 110.60645294189453
loss: tensor(147.0428, grad_fn=<MseLossBackward0>)
loss: tensor(156.7133, grad_fn=<MseLossBackward0>)
loss: tensor(86.3326, grad_fn=<MseLossBackward0>)
loss: tensor(84.1812, grad_fn=<MseLossBackward0>)
loss: tensor(52.4634, grad_fn=<MseLossBackward0>)
loss: tensor(35.2091, grad_fn=<MseLossBackward0>)
loss: tensor(151.4204, grad_fn=<MseLossBackward0>)
loss: tensor(131.9465, grad_fn=<MseLossBackward0>)
loss: tensor(54.1225, grad_fn=<MseLossBackward0>)
loss: tensor(147.4296, grad_fn=<MseLossBackward0>)
loss: tensor(49.7841, grad_fn=<MseLossBackward0>)
loss: tensor(44.0199, grad_fn=<MseLossBackward0>)


 24%|██▍       | 12/50 [02:13<07:09, 11.29s/it]

epoch: 11 train_epoch_loss: 95.05544185638428 val_epoch_loss: 93.4620864868164
loss: tensor(57.9807, grad_fn=<MseLossBackward0>)
loss: tensor(91.9039, grad_fn=<MseLossBackward0>)
loss: tensor(141.9401, grad_fn=<MseLossBackward0>)
loss: tensor(55.3842, grad_fn=<MseLossBackward0>)
loss: tensor(84.0287, grad_fn=<MseLossBackward0>)
loss: tensor(166.4043, grad_fn=<MseLossBackward0>)
loss: tensor(113.3166, grad_fn=<MseLossBackward0>)
loss: tensor(77.5406, grad_fn=<MseLossBackward0>)
loss: tensor(71.9586, grad_fn=<MseLossBackward0>)
loss: tensor(150.1959, grad_fn=<MseLossBackward0>)
loss: tensor(60.6188, grad_fn=<MseLossBackward0>)
loss: tensor(143.0414, grad_fn=<MseLossBackward0>)


 26%|██▌       | 13/50 [02:25<06:58, 11.31s/it]

epoch: 12 train_epoch_loss: 101.19282213846843 val_epoch_loss: 107.98722839355469
loss: tensor(153.4767, grad_fn=<MseLossBackward0>)
loss: tensor(73.5860, grad_fn=<MseLossBackward0>)
loss: tensor(120.4787, grad_fn=<MseLossBackward0>)
loss: tensor(190.7530, grad_fn=<MseLossBackward0>)
loss: tensor(60.0986, grad_fn=<MseLossBackward0>)
loss: tensor(54.8362, grad_fn=<MseLossBackward0>)
loss: tensor(97.7048, grad_fn=<MseLossBackward0>)
loss: tensor(122.0237, grad_fn=<MseLossBackward0>)
loss: tensor(152.9503, grad_fn=<MseLossBackward0>)
loss: tensor(117.0459, grad_fn=<MseLossBackward0>)
loss: tensor(91.5735, grad_fn=<MseLossBackward0>)
loss: tensor(24.6770, grad_fn=<MseLossBackward0>)


 28%|██▊       | 14/50 [02:36<06:46, 11.29s/it]

epoch: 13 train_epoch_loss: 104.93370215098064 val_epoch_loss: 103.83235931396484
loss: tensor(126.0186, grad_fn=<MseLossBackward0>)
loss: tensor(58.6495, grad_fn=<MseLossBackward0>)
loss: tensor(173.3245, grad_fn=<MseLossBackward0>)
loss: tensor(192.4957, grad_fn=<MseLossBackward0>)
loss: tensor(71.3576, grad_fn=<MseLossBackward0>)
loss: tensor(97.1900, grad_fn=<MseLossBackward0>)
loss: tensor(44.7918, grad_fn=<MseLossBackward0>)
loss: tensor(49.5096, grad_fn=<MseLossBackward0>)
loss: tensor(31.4884, grad_fn=<MseLossBackward0>)
loss: tensor(51.7940, grad_fn=<MseLossBackward0>)
loss: tensor(140.0908, grad_fn=<MseLossBackward0>)
loss: tensor(96.4168, grad_fn=<MseLossBackward0>)


 30%|███       | 15/50 [02:47<06:34, 11.27s/it]

epoch: 14 train_epoch_loss: 94.42727247873943 val_epoch_loss: 93.76574668884277
loss: tensor(71.8709, grad_fn=<MseLossBackward0>)
loss: tensor(110.4937, grad_fn=<MseLossBackward0>)
loss: tensor(91.8921, grad_fn=<MseLossBackward0>)
loss: tensor(171.1233, grad_fn=<MseLossBackward0>)
loss: tensor(96.0188, grad_fn=<MseLossBackward0>)
loss: tensor(120.4688, grad_fn=<MseLossBackward0>)
loss: tensor(32.3019, grad_fn=<MseLossBackward0>)
loss: tensor(44.6542, grad_fn=<MseLossBackward0>)
loss: tensor(60.5491, grad_fn=<MseLossBackward0>)
loss: tensor(166.3173, grad_fn=<MseLossBackward0>)
loss: tensor(152.1923, grad_fn=<MseLossBackward0>)
loss: tensor(88.9620, grad_fn=<MseLossBackward0>)


 32%|███▏      | 16/50 [02:58<06:23, 11.29s/it]

epoch: 15 train_epoch_loss: 100.57035891215007 val_epoch_loss: 89.82353477478027
loss: tensor(193.5141, grad_fn=<MseLossBackward0>)
loss: tensor(156.6026, grad_fn=<MseLossBackward0>)
loss: tensor(47.3736, grad_fn=<MseLossBackward0>)
loss: tensor(62.9751, grad_fn=<MseLossBackward0>)
loss: tensor(36.4040, grad_fn=<MseLossBackward0>)
loss: tensor(97.4312, grad_fn=<MseLossBackward0>)
loss: tensor(176.3574, grad_fn=<MseLossBackward0>)
loss: tensor(143.2830, grad_fn=<MseLossBackward0>)
loss: tensor(34.3684, grad_fn=<MseLossBackward0>)
loss: tensor(53.9805, grad_fn=<MseLossBackward0>)
loss: tensor(202.2743, grad_fn=<MseLossBackward0>)
loss: tensor(78.1180, grad_fn=<MseLossBackward0>)


 34%|███▍      | 17/50 [03:10<06:13, 11.33s/it]

epoch: 16 train_epoch_loss: 106.8901818593343 val_epoch_loss: 92.9478588104248
loss: tensor(48.3803, grad_fn=<MseLossBackward0>)
loss: tensor(219.5474, grad_fn=<MseLossBackward0>)
loss: tensor(55.2928, grad_fn=<MseLossBackward0>)
loss: tensor(104.7837, grad_fn=<MseLossBackward0>)
loss: tensor(75.5156, grad_fn=<MseLossBackward0>)
loss: tensor(77.0692, grad_fn=<MseLossBackward0>)
loss: tensor(73.6440, grad_fn=<MseLossBackward0>)
loss: tensor(76.1889, grad_fn=<MseLossBackward0>)
loss: tensor(43.4464, grad_fn=<MseLossBackward0>)
loss: tensor(192.5202, grad_fn=<MseLossBackward0>)
loss: tensor(174.0771, grad_fn=<MseLossBackward0>)
loss: tensor(84.0004, grad_fn=<MseLossBackward0>)


 36%|███▌      | 18/50 [03:21<06:03, 11.35s/it]

epoch: 17 train_epoch_loss: 102.0388371149699 val_epoch_loss: 78.23210983276367
loss: tensor(248.8095, grad_fn=<MseLossBackward0>)
loss: tensor(51.6316, grad_fn=<MseLossBackward0>)
loss: tensor(82.7410, grad_fn=<MseLossBackward0>)
loss: tensor(28.1390, grad_fn=<MseLossBackward0>)
loss: tensor(143.3573, grad_fn=<MseLossBackward0>)
loss: tensor(94.2375, grad_fn=<MseLossBackward0>)
loss: tensor(47.3512, grad_fn=<MseLossBackward0>)
loss: tensor(80.3314, grad_fn=<MseLossBackward0>)
loss: tensor(74.7673, grad_fn=<MseLossBackward0>)
loss: tensor(71.1330, grad_fn=<MseLossBackward0>)
loss: tensor(112.7748, grad_fn=<MseLossBackward0>)
loss: tensor(90.8650, grad_fn=<MseLossBackward0>)


 38%|███▊      | 19/50 [03:33<05:53, 11.40s/it]

epoch: 18 train_epoch_loss: 93.84488010406494 val_epoch_loss: 71.57976379394532
loss: tensor(102.3899, grad_fn=<MseLossBackward0>)
loss: tensor(95.1603, grad_fn=<MseLossBackward0>)
loss: tensor(165.5496, grad_fn=<MseLossBackward0>)
loss: tensor(118.6211, grad_fn=<MseLossBackward0>)
loss: tensor(75.3300, grad_fn=<MseLossBackward0>)
loss: tensor(35.8252, grad_fn=<MseLossBackward0>)
loss: tensor(79.2430, grad_fn=<MseLossBackward0>)
loss: tensor(35.0339, grad_fn=<MseLossBackward0>)
loss: tensor(96.7191, grad_fn=<MseLossBackward0>)
loss: tensor(64.7756, grad_fn=<MseLossBackward0>)
loss: tensor(64.8220, grad_fn=<MseLossBackward0>)
loss: tensor(234.7266, grad_fn=<MseLossBackward0>)


 40%|████      | 20/50 [03:44<05:43, 11.44s/it]

epoch: 19 train_epoch_loss: 97.34969965616862 val_epoch_loss: 61.85480270385742
best_test_loss --------------------- 61.85480270385742
loss: tensor(161.1234, grad_fn=<MseLossBackward0>)
loss: tensor(33.8485, grad_fn=<MseLossBackward0>)
loss: tensor(57.8728, grad_fn=<MseLossBackward0>)
loss: tensor(44.5895, grad_fn=<MseLossBackward0>)
loss: tensor(96.9597, grad_fn=<MseLossBackward0>)
loss: tensor(97.1109, grad_fn=<MseLossBackward0>)
loss: tensor(135.3037, grad_fn=<MseLossBackward0>)
loss: tensor(129.9238, grad_fn=<MseLossBackward0>)
loss: tensor(170.5620, grad_fn=<MseLossBackward0>)
loss: tensor(43.9189, grad_fn=<MseLossBackward0>)
loss: tensor(19.8659, grad_fn=<MseLossBackward0>)
loss: tensor(151.4019, grad_fn=<MseLossBackward0>)


 42%|████▏     | 21/50 [03:56<05:31, 11.43s/it]

epoch: 20 train_epoch_loss: 95.20674673716228 val_epoch_loss: 78.87069053649903
loss: tensor(98.8904, grad_fn=<MseLossBackward0>)
loss: tensor(47.5101, grad_fn=<MseLossBackward0>)
loss: tensor(29.4890, grad_fn=<MseLossBackward0>)
loss: tensor(105.7051, grad_fn=<MseLossBackward0>)
loss: tensor(29.6874, grad_fn=<MseLossBackward0>)
loss: tensor(82.9318, grad_fn=<MseLossBackward0>)
loss: tensor(40.0410, grad_fn=<MseLossBackward0>)
loss: tensor(96.1311, grad_fn=<MseLossBackward0>)
loss: tensor(143.6057, grad_fn=<MseLossBackward0>)
loss: tensor(75.0522, grad_fn=<MseLossBackward0>)
loss: tensor(191.5769, grad_fn=<MseLossBackward0>)
loss: tensor(123.0143, grad_fn=<MseLossBackward0>)


 44%|████▍     | 22/50 [04:07<05:21, 11.47s/it]

epoch: 21 train_epoch_loss: 88.63625160853069 val_epoch_loss: 80.25759887695312
loss: tensor(89.9603, grad_fn=<MseLossBackward0>)
loss: tensor(62.8926, grad_fn=<MseLossBackward0>)
loss: tensor(241.5457, grad_fn=<MseLossBackward0>)
loss: tensor(30.1335, grad_fn=<MseLossBackward0>)
loss: tensor(52.5999, grad_fn=<MseLossBackward0>)
loss: tensor(77.6076, grad_fn=<MseLossBackward0>)
loss: tensor(203.1398, grad_fn=<MseLossBackward0>)
loss: tensor(25.9454, grad_fn=<MseLossBackward0>)
loss: tensor(51.6568, grad_fn=<MseLossBackward0>)
loss: tensor(40.2264, grad_fn=<MseLossBackward0>)
loss: tensor(133.1456, grad_fn=<MseLossBackward0>)
loss: tensor(46.6297, grad_fn=<MseLossBackward0>)


 46%|████▌     | 23/50 [04:19<05:10, 11.49s/it]

epoch: 22 train_epoch_loss: 87.95693333943684 val_epoch_loss: 61.6938980102539
best_test_loss --------------------- 61.6938980102539
loss: tensor(89.1584, grad_fn=<MseLossBackward0>)
loss: tensor(35.8511, grad_fn=<MseLossBackward0>)
loss: tensor(38.7474, grad_fn=<MseLossBackward0>)
loss: tensor(184.6027, grad_fn=<MseLossBackward0>)
loss: tensor(66.3170, grad_fn=<MseLossBackward0>)
loss: tensor(152.9509, grad_fn=<MseLossBackward0>)
loss: tensor(47.1264, grad_fn=<MseLossBackward0>)
loss: tensor(47.3065, grad_fn=<MseLossBackward0>)
loss: tensor(21.7433, grad_fn=<MseLossBackward0>)
loss: tensor(97.7718, grad_fn=<MseLossBackward0>)
loss: tensor(122.3750, grad_fn=<MseLossBackward0>)
loss: tensor(53.6425, grad_fn=<MseLossBackward0>)


 48%|████▊     | 24/50 [04:30<04:58, 11.48s/it]

epoch: 23 train_epoch_loss: 79.79941082000732 val_epoch_loss: 74.7144889831543
loss: tensor(22.8170, grad_fn=<MseLossBackward0>)
loss: tensor(18.9503, grad_fn=<MseLossBackward0>)
loss: tensor(90.4242, grad_fn=<MseLossBackward0>)
loss: tensor(72.0389, grad_fn=<MseLossBackward0>)
loss: tensor(137.6457, grad_fn=<MseLossBackward0>)
loss: tensor(156.2246, grad_fn=<MseLossBackward0>)
loss: tensor(114.8208, grad_fn=<MseLossBackward0>)
loss: tensor(52.5944, grad_fn=<MseLossBackward0>)
loss: tensor(43.3399, grad_fn=<MseLossBackward0>)
loss: tensor(136.1980, grad_fn=<MseLossBackward0>)
loss: tensor(98.3530, grad_fn=<MseLossBackward0>)
loss: tensor(35.5697, grad_fn=<MseLossBackward0>)


 50%|█████     | 25/50 [04:42<04:50, 11.61s/it]

epoch: 24 train_epoch_loss: 81.58138306935628 val_epoch_loss: 66.35725708007813
loss: tensor(70.2892, grad_fn=<MseLossBackward0>)
loss: tensor(101.4074, grad_fn=<MseLossBackward0>)
loss: tensor(35.5112, grad_fn=<MseLossBackward0>)
loss: tensor(135.1638, grad_fn=<MseLossBackward0>)
loss: tensor(33.9044, grad_fn=<MseLossBackward0>)
loss: tensor(143.9635, grad_fn=<MseLossBackward0>)
loss: tensor(66.9025, grad_fn=<MseLossBackward0>)
loss: tensor(20.6579, grad_fn=<MseLossBackward0>)
loss: tensor(39.0346, grad_fn=<MseLossBackward0>)
loss: tensor(135.5092, grad_fn=<MseLossBackward0>)
loss: tensor(68.9389, grad_fn=<MseLossBackward0>)
loss: tensor(141.8398, grad_fn=<MseLossBackward0>)


 52%|█████▏    | 26/50 [04:54<04:41, 11.74s/it]

epoch: 25 train_epoch_loss: 82.76020431518555 val_epoch_loss: 73.7572566986084
loss: tensor(85.6615, grad_fn=<MseLossBackward0>)
loss: tensor(143.6597, grad_fn=<MseLossBackward0>)
loss: tensor(36.3954, grad_fn=<MseLossBackward0>)
loss: tensor(52.6871, grad_fn=<MseLossBackward0>)
loss: tensor(106.2005, grad_fn=<MseLossBackward0>)
loss: tensor(178.1250, grad_fn=<MseLossBackward0>)
loss: tensor(45.3280, grad_fn=<MseLossBackward0>)
loss: tensor(41.0438, grad_fn=<MseLossBackward0>)
loss: tensor(167.5015, grad_fn=<MseLossBackward0>)
loss: tensor(95.6580, grad_fn=<MseLossBackward0>)
loss: tensor(49.0375, grad_fn=<MseLossBackward0>)
loss: tensor(53.1085, grad_fn=<MseLossBackward0>)


 54%|█████▍    | 27/50 [05:06<04:32, 11.85s/it]

epoch: 26 train_epoch_loss: 87.86721960703532 val_epoch_loss: 44.76946687698364
best_test_loss --------------------- 44.76946687698364
loss: tensor(48.0924, grad_fn=<MseLossBackward0>)
loss: tensor(138.3958, grad_fn=<MseLossBackward0>)
loss: tensor(73.4770, grad_fn=<MseLossBackward0>)
loss: tensor(74.1083, grad_fn=<MseLossBackward0>)
loss: tensor(118.1870, grad_fn=<MseLossBackward0>)
loss: tensor(52.0327, grad_fn=<MseLossBackward0>)
loss: tensor(56.7055, grad_fn=<MseLossBackward0>)
loss: tensor(46.5912, grad_fn=<MseLossBackward0>)
loss: tensor(68.2088, grad_fn=<MseLossBackward0>)
loss: tensor(96.8687, grad_fn=<MseLossBackward0>)
loss: tensor(233.6398, grad_fn=<MseLossBackward0>)
loss: tensor(67.4339, grad_fn=<MseLossBackward0>)


 56%|█████▌    | 28/50 [05:18<04:19, 11.78s/it]

epoch: 27 train_epoch_loss: 89.47841517130534 val_epoch_loss: 76.18366146087646
loss: tensor(65.6453, grad_fn=<MseLossBackward0>)
loss: tensor(104.4063, grad_fn=<MseLossBackward0>)
loss: tensor(40.4683, grad_fn=<MseLossBackward0>)
loss: tensor(137.8952, grad_fn=<MseLossBackward0>)
loss: tensor(72.3743, grad_fn=<MseLossBackward0>)
loss: tensor(96.2898, grad_fn=<MseLossBackward0>)
loss: tensor(32.2316, grad_fn=<MseLossBackward0>)
loss: tensor(48.9000, grad_fn=<MseLossBackward0>)
loss: tensor(116.5714, grad_fn=<MseLossBackward0>)
loss: tensor(145.4237, grad_fn=<MseLossBackward0>)
loss: tensor(59.0024, grad_fn=<MseLossBackward0>)
loss: tensor(116.8821, grad_fn=<MseLossBackward0>)


 58%|█████▊    | 29/50 [05:30<04:08, 11.83s/it]

epoch: 28 train_epoch_loss: 86.3408571879069 val_epoch_loss: 67.9596061706543
loss: tensor(77.1406, grad_fn=<MseLossBackward0>)
loss: tensor(84.5612, grad_fn=<MseLossBackward0>)
loss: tensor(132.2251, grad_fn=<MseLossBackward0>)
loss: tensor(33.7138, grad_fn=<MseLossBackward0>)
loss: tensor(83.8821, grad_fn=<MseLossBackward0>)
loss: tensor(77.0148, grad_fn=<MseLossBackward0>)
loss: tensor(72.1002, grad_fn=<MseLossBackward0>)
loss: tensor(158.4309, grad_fn=<MseLossBackward0>)
loss: tensor(90.0502, grad_fn=<MseLossBackward0>)
loss: tensor(96.6036, grad_fn=<MseLossBackward0>)
loss: tensor(43.5879, grad_fn=<MseLossBackward0>)
loss: tensor(43.2551, grad_fn=<MseLossBackward0>)


 60%|██████    | 30/50 [05:43<04:01, 12.08s/it]

epoch: 29 train_epoch_loss: 82.71378548940022 val_epoch_loss: 72.84137115478515
loss: tensor(58.1436, grad_fn=<MseLossBackward0>)
loss: tensor(63.5162, grad_fn=<MseLossBackward0>)
loss: tensor(72.7519, grad_fn=<MseLossBackward0>)
loss: tensor(92.0136, grad_fn=<MseLossBackward0>)
loss: tensor(52.2473, grad_fn=<MseLossBackward0>)
loss: tensor(91.5323, grad_fn=<MseLossBackward0>)
loss: tensor(59.1394, grad_fn=<MseLossBackward0>)
loss: tensor(144.8649, grad_fn=<MseLossBackward0>)
loss: tensor(52.2996, grad_fn=<MseLossBackward0>)
loss: tensor(80.0626, grad_fn=<MseLossBackward0>)
loss: tensor(58.9987, grad_fn=<MseLossBackward0>)
loss: tensor(139.7459, grad_fn=<MseLossBackward0>)


 62%|██████▏   | 31/50 [05:54<03:45, 11.88s/it]

epoch: 30 train_epoch_loss: 80.4429915746053 val_epoch_loss: 66.84893531799317
loss: tensor(94.0217, grad_fn=<MseLossBackward0>)
loss: tensor(67.2921, grad_fn=<MseLossBackward0>)
loss: tensor(31.0651, grad_fn=<MseLossBackward0>)
loss: tensor(168.3769, grad_fn=<MseLossBackward0>)
loss: tensor(124.0184, grad_fn=<MseLossBackward0>)
loss: tensor(166.0718, grad_fn=<MseLossBackward0>)
loss: tensor(46.0566, grad_fn=<MseLossBackward0>)
loss: tensor(72.0467, grad_fn=<MseLossBackward0>)
loss: tensor(29.9755, grad_fn=<MseLossBackward0>)
loss: tensor(75.1980, grad_fn=<MseLossBackward0>)
loss: tensor(40.0043, grad_fn=<MseLossBackward0>)
loss: tensor(38.0594, grad_fn=<MseLossBackward0>)


 64%|██████▍   | 32/50 [06:06<03:32, 11.81s/it]

epoch: 31 train_epoch_loss: 79.34887981414795 val_epoch_loss: 66.75725173950195
loss: tensor(22.6742, grad_fn=<MseLossBackward0>)
loss: tensor(19.6869, grad_fn=<MseLossBackward0>)
loss: tensor(52.2484, grad_fn=<MseLossBackward0>)
loss: tensor(84.6506, grad_fn=<MseLossBackward0>)
loss: tensor(66.6756, grad_fn=<MseLossBackward0>)
loss: tensor(76.6711, grad_fn=<MseLossBackward0>)
loss: tensor(125.0340, grad_fn=<MseLossBackward0>)
loss: tensor(122.3382, grad_fn=<MseLossBackward0>)
loss: tensor(47.8054, grad_fn=<MseLossBackward0>)
loss: tensor(23.7615, grad_fn=<MseLossBackward0>)
loss: tensor(184.1081, grad_fn=<MseLossBackward0>)
loss: tensor(54.3622, grad_fn=<MseLossBackward0>)


 66%|██████▌   | 33/50 [06:17<03:19, 11.73s/it]

epoch: 32 train_epoch_loss: 73.33469279607137 val_epoch_loss: 69.97620468139648
loss: tensor(165.6153, grad_fn=<MseLossBackward0>)
loss: tensor(83.6747, grad_fn=<MseLossBackward0>)
loss: tensor(126.1014, grad_fn=<MseLossBackward0>)
loss: tensor(32.2388, grad_fn=<MseLossBackward0>)
loss: tensor(38.0277, grad_fn=<MseLossBackward0>)
loss: tensor(49.4742, grad_fn=<MseLossBackward0>)
loss: tensor(34.4292, grad_fn=<MseLossBackward0>)
loss: tensor(77.9901, grad_fn=<MseLossBackward0>)
loss: tensor(47.2105, grad_fn=<MseLossBackward0>)
loss: tensor(46.0902, grad_fn=<MseLossBackward0>)
loss: tensor(99.9169, grad_fn=<MseLossBackward0>)
loss: tensor(116.2289, grad_fn=<MseLossBackward0>)


 68%|██████▊   | 34/50 [06:29<03:07, 11.71s/it]

epoch: 33 train_epoch_loss: 76.41649055480957 val_epoch_loss: 68.03258972167968
loss: tensor(54.0473, grad_fn=<MseLossBackward0>)
loss: tensor(80.8955, grad_fn=<MseLossBackward0>)
loss: tensor(98.9579, grad_fn=<MseLossBackward0>)
loss: tensor(37.8690, grad_fn=<MseLossBackward0>)
loss: tensor(188.7260, grad_fn=<MseLossBackward0>)
loss: tensor(56.7972, grad_fn=<MseLossBackward0>)
loss: tensor(69.2032, grad_fn=<MseLossBackward0>)
loss: tensor(91.2952, grad_fn=<MseLossBackward0>)
loss: tensor(36.7062, grad_fn=<MseLossBackward0>)
loss: tensor(48.9097, grad_fn=<MseLossBackward0>)
loss: tensor(25.7477, grad_fn=<MseLossBackward0>)
loss: tensor(108.4863, grad_fn=<MseLossBackward0>)


 70%|███████   | 35/50 [06:40<02:54, 11.65s/it]

epoch: 34 train_epoch_loss: 74.80343341827393 val_epoch_loss: 69.44662666320801
loss: tensor(26.1387, grad_fn=<MseLossBackward0>)
loss: tensor(62.7836, grad_fn=<MseLossBackward0>)
loss: tensor(85.0905, grad_fn=<MseLossBackward0>)
loss: tensor(98.0892, grad_fn=<MseLossBackward0>)
loss: tensor(59.8799, grad_fn=<MseLossBackward0>)
loss: tensor(42.0459, grad_fn=<MseLossBackward0>)
loss: tensor(62.7658, grad_fn=<MseLossBackward0>)
loss: tensor(177.4330, grad_fn=<MseLossBackward0>)
loss: tensor(39.5522, grad_fn=<MseLossBackward0>)
loss: tensor(124.8115, grad_fn=<MseLossBackward0>)
loss: tensor(44.1851, grad_fn=<MseLossBackward0>)
loss: tensor(33.0414, grad_fn=<MseLossBackward0>)


 72%|███████▏  | 36/50 [06:52<02:42, 11.63s/it]

epoch: 35 train_epoch_loss: 71.31806914011638 val_epoch_loss: 63.262163162231445
loss: tensor(63.0331, grad_fn=<MseLossBackward0>)
loss: tensor(44.1815, grad_fn=<MseLossBackward0>)
loss: tensor(57.4635, grad_fn=<MseLossBackward0>)
loss: tensor(32.3691, grad_fn=<MseLossBackward0>)
loss: tensor(189.5407, grad_fn=<MseLossBackward0>)
loss: tensor(78.8462, grad_fn=<MseLossBackward0>)
loss: tensor(20.3216, grad_fn=<MseLossBackward0>)
loss: tensor(139.0824, grad_fn=<MseLossBackward0>)
loss: tensor(110.4126, grad_fn=<MseLossBackward0>)
loss: tensor(42.4577, grad_fn=<MseLossBackward0>)
loss: tensor(22.1443, grad_fn=<MseLossBackward0>)
loss: tensor(99.5005, grad_fn=<MseLossBackward0>)


 74%|███████▍  | 37/50 [07:04<02:31, 11.67s/it]

epoch: 36 train_epoch_loss: 74.94607893625896 val_epoch_loss: 63.297644805908206
loss: tensor(63.6856, grad_fn=<MseLossBackward0>)
loss: tensor(25.0360, grad_fn=<MseLossBackward0>)
loss: tensor(100.4047, grad_fn=<MseLossBackward0>)
loss: tensor(40.7069, grad_fn=<MseLossBackward0>)
loss: tensor(110.9617, grad_fn=<MseLossBackward0>)
loss: tensor(45.0644, grad_fn=<MseLossBackward0>)
loss: tensor(132.5204, grad_fn=<MseLossBackward0>)
loss: tensor(87.6982, grad_fn=<MseLossBackward0>)
loss: tensor(62.3578, grad_fn=<MseLossBackward0>)
loss: tensor(37.6161, grad_fn=<MseLossBackward0>)
loss: tensor(90.7524, grad_fn=<MseLossBackward0>)
loss: tensor(52.5763, grad_fn=<MseLossBackward0>)


 76%|███████▌  | 38/50 [07:15<02:19, 11.66s/it]

epoch: 37 train_epoch_loss: 70.78173049290974 val_epoch_loss: 64.88898391723633
loss: tensor(49.4835, grad_fn=<MseLossBackward0>)
loss: tensor(33.6951, grad_fn=<MseLossBackward0>)
loss: tensor(70.5900, grad_fn=<MseLossBackward0>)
loss: tensor(14.1834, grad_fn=<MseLossBackward0>)
loss: tensor(46.4403, grad_fn=<MseLossBackward0>)
loss: tensor(79.6446, grad_fn=<MseLossBackward0>)
loss: tensor(214.4605, grad_fn=<MseLossBackward0>)
loss: tensor(125.0045, grad_fn=<MseLossBackward0>)
loss: tensor(39.4321, grad_fn=<MseLossBackward0>)
loss: tensor(55.2622, grad_fn=<MseLossBackward0>)
loss: tensor(91.8158, grad_fn=<MseLossBackward0>)
loss: tensor(65.7115, grad_fn=<MseLossBackward0>)


 78%|███████▊  | 39/50 [07:27<02:08, 11.70s/it]

epoch: 38 train_epoch_loss: 73.81029033660889 val_epoch_loss: 59.58242568969727
loss: tensor(19.5509, grad_fn=<MseLossBackward0>)
loss: tensor(88.8897, grad_fn=<MseLossBackward0>)
loss: tensor(136.2780, grad_fn=<MseLossBackward0>)
loss: tensor(87.2758, grad_fn=<MseLossBackward0>)
loss: tensor(43.0879, grad_fn=<MseLossBackward0>)
loss: tensor(42.4594, grad_fn=<MseLossBackward0>)
loss: tensor(54.0902, grad_fn=<MseLossBackward0>)
loss: tensor(113.6003, grad_fn=<MseLossBackward0>)
loss: tensor(51.4609, grad_fn=<MseLossBackward0>)
loss: tensor(64.0047, grad_fn=<MseLossBackward0>)
loss: tensor(64.1312, grad_fn=<MseLossBackward0>)
loss: tensor(47.0410, grad_fn=<MseLossBackward0>)


 80%|████████  | 40/50 [07:39<01:56, 11.69s/it]

epoch: 39 train_epoch_loss: 67.6558403968811 val_epoch_loss: 70.96195373535156
loss: tensor(21.9244, grad_fn=<MseLossBackward0>)
loss: tensor(73.8236, grad_fn=<MseLossBackward0>)
loss: tensor(66.3585, grad_fn=<MseLossBackward0>)
loss: tensor(54.4526, grad_fn=<MseLossBackward0>)
loss: tensor(113.3181, grad_fn=<MseLossBackward0>)
loss: tensor(119.9569, grad_fn=<MseLossBackward0>)
loss: tensor(35.4378, grad_fn=<MseLossBackward0>)
loss: tensor(69.1581, grad_fn=<MseLossBackward0>)
loss: tensor(26.0031, grad_fn=<MseLossBackward0>)
loss: tensor(157.4937, grad_fn=<MseLossBackward0>)
loss: tensor(63.2567, grad_fn=<MseLossBackward0>)
loss: tensor(58.9891, grad_fn=<MseLossBackward0>)


 82%|████████▏ | 41/50 [07:51<01:45, 11.73s/it]

epoch: 40 train_epoch_loss: 71.68104235331218 val_epoch_loss: 68.52952194213867
loss: tensor(79.7849, grad_fn=<MseLossBackward0>)
loss: tensor(81.6701, grad_fn=<MseLossBackward0>)
loss: tensor(123.9685, grad_fn=<MseLossBackward0>)
loss: tensor(47.5489, grad_fn=<MseLossBackward0>)
loss: tensor(51.5551, grad_fn=<MseLossBackward0>)
loss: tensor(44.9825, grad_fn=<MseLossBackward0>)
loss: tensor(73.5851, grad_fn=<MseLossBackward0>)
loss: tensor(62.5572, grad_fn=<MseLossBackward0>)
loss: tensor(50.1163, grad_fn=<MseLossBackward0>)
loss: tensor(40.9254, grad_fn=<MseLossBackward0>)
loss: tensor(69.3248, grad_fn=<MseLossBackward0>)
loss: tensor(64.2954, grad_fn=<MseLossBackward0>)


 84%|████████▍ | 42/50 [08:02<01:34, 11.76s/it]

epoch: 41 train_epoch_loss: 65.85951073964436 val_epoch_loss: 67.93037185668945
loss: tensor(66.6172, grad_fn=<MseLossBackward0>)
loss: tensor(30.7825, grad_fn=<MseLossBackward0>)
loss: tensor(39.1367, grad_fn=<MseLossBackward0>)
loss: tensor(49.1660, grad_fn=<MseLossBackward0>)
loss: tensor(29.3542, grad_fn=<MseLossBackward0>)
loss: tensor(195.5952, grad_fn=<MseLossBackward0>)
loss: tensor(27.0508, grad_fn=<MseLossBackward0>)
loss: tensor(69.7397, grad_fn=<MseLossBackward0>)
loss: tensor(70.6169, grad_fn=<MseLossBackward0>)
loss: tensor(170.2443, grad_fn=<MseLossBackward0>)
loss: tensor(83.2825, grad_fn=<MseLossBackward0>)
loss: tensor(49.6539, grad_fn=<MseLossBackward0>)


 86%|████████▌ | 43/50 [08:14<01:22, 11.80s/it]

epoch: 42 train_epoch_loss: 73.43666076660156 val_epoch_loss: 70.84398498535157
loss: tensor(23.0042, grad_fn=<MseLossBackward0>)
loss: tensor(30.5661, grad_fn=<MseLossBackward0>)
loss: tensor(61.4727, grad_fn=<MseLossBackward0>)
loss: tensor(101.3197, grad_fn=<MseLossBackward0>)
loss: tensor(97.1071, grad_fn=<MseLossBackward0>)
loss: tensor(81.1108, grad_fn=<MseLossBackward0>)
loss: tensor(136.5038, grad_fn=<MseLossBackward0>)
loss: tensor(59.2030, grad_fn=<MseLossBackward0>)
loss: tensor(35.1549, grad_fn=<MseLossBackward0>)
loss: tensor(90.7103, grad_fn=<MseLossBackward0>)


In [5]:
# 画图
# loss图
fig = plt.figure(facecolor='white', figsize=(10, 7))
plt.xlabel('X')
plt.ylabel('Y')
plt.xlim(xmax=len(val_loss), xmin=0)
plt.ylim(ymax=max(max(train_loss), max(val_loss)), ymin=0)

x1 = [i for i in range(0, len(train_loss), 1)]
y1 = val_loss
x2 = [i for i in range(0, len(train_loss), 1)]
y2 = train_loss
colors1 = '#00CED4'
colors2 = '#DC143C'
area = np.pi * 4 ** 1
# 画散点图
plt.scatter(x1, y1, s=area,c=colors1, alpha=0.4, label='val_loss')
plt.scatter(x2, y2, s=area,c=colors2, alpha=0.4, label='train_loss')
plt.legend()
plt.sho