# LSTM AutoEncoder

LSTM AutoEncoder是将原始的全连接变成了LSTM，然后构造出来的AutoEncoder模型，输入与输出是一样的数据为最佳


LSTM AutoEncoder的版本通常有以下两种形式：

- encoder与decoder都是：lstm
- encoder是 lstm + fc ; decoder是 fc + lstm

ref: [pytorch 实现 LSTM AutoEncoder 与案例](https://blog.csdn.net/weixin_35757704/article/details/118459850)

In [9]:
import torch
from torch import nn
import torch.utils.data as Data

LSTM组成 的网络模型

In [10]:
class LstmAutoEncoder(nn.Module):
    def __init__(self, input_layer=300, hidden_layer=100, batch_size=20):
        super(LstmAutoEncoder, self).__init__()

        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.batch_size = batch_size
        self.encoder_lstm = nn.LSTM(self.input_layer, self.hidden_layer, batch_first=True)
        self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)

    def forward(self, input_x):
        input_x = input_x.view(len(input_x), 1, -1)
        # encoder
        encoder_lstm, (n, c) = self.encoder_lstm(input_x,
                                                 (torch.zeros(1, self.batch_size, self.hidden_layer),
                                                  torch.zeros(1, self.batch_size, self.hidden_layer)))
        # decoder
        decoder_lstm, (n, c) = self.decoder_lstm(encoder_lstm,
                                                 (torch.zeros(1, self.batch_size, self.input_layer),
                                                  torch.zeros(1, self.batch_size, self.input_layer)))
        return decoder_lstm.squeeze()

全连接+LSTM 网络模型

In [11]:
class LstmFcAutoEncoder(nn.Module):
    def __init__(self, input_layer=300, hidden_layer=100, batch_size=20):
        super(LstmFcAutoEncoder, self).__init__()

        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.batch_size = batch_size

        self.encoder_lstm = nn.LSTM(self.input_layer, self.hidden_layer, batch_first=True)
        self.encoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
        self.decoder_lstm = nn.LSTM(self.hidden_layer, self.input_layer, batch_first=True)
        self.decoder_fc = nn.Linear(self.hidden_layer, self.hidden_layer)
        self.relu = nn.ReLU()

    def forward(self, input_x):
        input_x = input_x.view(len(input_x), 1, -1)
        # encoder
        encoder_lstm, (n, c) = self.encoder_lstm(input_x,
                                                 # shape: (n_layers, batch, hidden_size)
                                                 (torch.zeros(1, self.batch_size, self.hidden_layer),
                                                  torch.zeros(1, self.batch_size, self.hidden_layer)))
        encoder_fc = self.encoder_fc(encoder_lstm)
        encoder_out = self.relu(encoder_fc)
        # decoder
        decoder_fc = self.relu(self.decoder_fc(encoder_out))
        decoder_lstm, (n, c) = self.decoder_lstm(decoder_fc,
                                                 (torch.zeros(1, 20, self.input_layer),
                                                  torch.zeros(1, 20, self.input_layer)))
        return decoder_lstm.squeeze()

In [12]:
def get_train_data():
    """得到训练数据，这里使用随机数生成训练数据，由此导致最终结果并不好"""

    def get_tensor_from_pd(dataframe_series) -> torch.Tensor:
        return torch.tensor(data=dataframe_series.values)

    import numpy as np
    import pandas as pd
    from sklearn import preprocessing
    # 生成训练数据x并做归一化后，构造成dataframe格式，再转换为tensor格式
    df = pd.DataFrame(data=preprocessing.MinMaxScaler().fit_transform(np.random.randint(0, 10, size=(2000, 300))))
    y = pd.Series(np.random.randint(0, 2, 2000))
    return get_tensor_from_pd(df).float(), get_tensor_from_pd(y).float()

In [13]:
if __name__ == '__main__':
    # 得到数据
    x, y = get_train_data()
    train_loader = Data.DataLoader(
        dataset=Data.TensorDataset(x, y),  # 封装进Data.TensorDataset()类的数据，可以为任意维度
        batch_size=20,  # 每块的大小
        shuffle=True,  # 要不要打乱数据 (打乱比较好)
        num_workers=2,  # 多进程（multiprocess）来读数据
    )
    # 建模三件套：loss，优化，epochs
    model = LstmAutoEncoder()  # lstm
    # model = LstmFcAutoEncoder()  # lstm+fc模型
    loss_function = nn.MSELoss()  # loss
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化器
    epochs = 150
    # 开始训练
    model.train()
    for i in range(epochs):
        for seq, labels in train_loader:
            optimizer.zero_grad()
            y_pred = model(seq).squeeze()  # 压缩维度：得到输出，并将维度为1的去除
            single_loss = loss_function(y_pred, seq)
            # 若想要获得类别，二分类问题使用四舍五入的方法即可：print(torch.round(y_pred))
            single_loss.backward()
            optimizer.step()
            print("Train Step:", i, " loss: ", single_loss)
		# 每20次，输出一次前20个的结果，对比一下效果
        if i % 20 == 0:
            test_data = x[:20]
            y_pred = model(test_data).squeeze()  # 压缩维度：得到输出，并将维度为1的去除
            print("TEST: ", test_data)
            print("PRED: ", y_pred)
            print("LOSS: ", loss_function(y_pred, test_data))

Train Step: 0  loss:  tensor(0.3524, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3470, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3410, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3350, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3334, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3198, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.3051, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2954, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2812, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2745, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2485, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2417, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2225, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.2038, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  tensor(0.1851, grad_fn=<MseLossBackward0>)
Train Step: 0  loss:  ten

Train Step: 1  loss:  tensor(0.1023, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1017, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1016, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.0996, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1015, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1011, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.0986, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1013, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1024, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1019, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1007, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1037, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1011, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1025, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  tensor(0.1027, grad_fn=<MseLossBackward0>)
Train Step: 1  loss:  ten

Train Step: 2  loss:  tensor(0.1023, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1019, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1010, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1022, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1019, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1024, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1012, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1012, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1013, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1002, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1013, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1012, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1001, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.1024, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  tensor(0.0998, grad_fn=<MseLossBackward0>)
Train Step: 2  loss:  ten

Train Step: 3  loss:  tensor(0.1008, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1024, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1014, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1023, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1015, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1023, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1009, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1005, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1010, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1048, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1005, grad_fn=<MseLossBackward0>)
Train Step: 3  loss:  tensor(0.1023, grad_fn=<MseLossBackward0>)
Train Step: 4  loss:  tensor(0.1032, grad_fn=<MseLossBackward0>)
Train Step: 4  loss:  tensor(0.1000, grad_fn=<MseLossBackward0>)
Train Step: 4  loss:  tensor(0.1006, grad_fn=<MseLossBackward0>)
Train Step: 4  loss:  ten

Train Step: 5  loss:  tensor(0.1021, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1019, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1042, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1020, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1020, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.0997, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1001, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1020, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.0998, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1020, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1036, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.0991, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1011, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.0998, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  tensor(0.1013, grad_fn=<MseLossBackward0>)
Train Step: 5  loss:  ten

Train Step: 6  loss:  tensor(0.1015, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1014, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0995, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1004, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0986, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1016, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1025, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1014, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0994, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0993, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.1020, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0983, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0989, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0992, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  tensor(0.0991, grad_fn=<MseLossBackward0>)
Train Step: 6  loss:  ten

Train Step: 7  loss:  tensor(0.0991, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0983, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.1006, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0980, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0999, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0989, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.1008, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0979, grad_fn=<MseLossBackward0>)
Train Step: 7  loss:  tensor(0.0994, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.0994, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.0981, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.0992, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.1002, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.0985, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  tensor(0.1006, grad_fn=<MseLossBackward0>)
Train Step: 8  loss:  ten

Train Step: 9  loss:  tensor(0.0991, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0982, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.1002, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0990, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0983, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0977, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0993, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.1006, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0994, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0981, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.1003, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0981, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0998, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0999, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  tensor(0.0983, grad_fn=<MseLossBackward0>)
Train Step: 9  loss:  ten

Train Step: 10  loss:  tensor(0.0991, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0995, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0974, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0989, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0997, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0969, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0977, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0965, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0978, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0971, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0964, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0980, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0999, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0984, grad_fn=<MseLossBackward0>)
Train Step: 10  loss:  tensor(0.0976, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 12  loss:  tensor(0.0964, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0954, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0970, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0971, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0984, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.1007, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0983, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0963, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0968, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0968, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0988, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0976, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0990, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0971, grad_fn=<MseLossBackward0>)
Train Step: 12  loss:  tensor(0.0972, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 13  loss:  tensor(0.0961, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0956, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0963, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0932, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0966, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0952, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0958, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0936, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0945, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0965, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0926, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0954, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0952, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0952, grad_fn=<MseLossBackward0>)
Train Step: 13  loss:  tensor(0.0965, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 14  loss:  tensor(0.0914, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0928, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0918, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0938, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0927, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0923, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0933, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0923, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0929, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0934, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0936, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0925, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0906, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0930, grad_fn=<MseLossBackward0>)
Train Step: 14  loss:  tensor(0.0928, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 16  loss:  tensor(0.0885, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0894, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0870, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0887, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0900, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0916, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0899, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0889, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0901, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0893, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0891, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0888, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0900, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0893, grad_fn=<MseLossBackward0>)
Train Step: 16  loss:  tensor(0.0901, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 17  loss:  tensor(0.0851, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0840, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0875, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0857, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0868, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0836, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0829, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0853, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0842, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0863, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0826, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0838, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0828, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0841, grad_fn=<MseLossBackward0>)
Train Step: 17  loss:  tensor(0.0846, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 18  loss:  tensor(0.0818, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0816, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0796, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0827, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0817, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0821, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0797, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0788, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0810, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0818, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0810, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0817, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0836, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0804, grad_fn=<MseLossBackward0>)
Train Step: 18  loss:  tensor(0.0804, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 20  loss:  tensor(0.0774, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0769, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0755, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0793, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0769, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0757, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0754, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0789, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0761, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0751, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0780, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0763, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0751, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0766, grad_fn=<MseLossBackward0>)
Train Step: 20  loss:  tensor(0.0753, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 21  loss:  tensor(0.0752, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0743, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0753, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0741, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0731, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0746, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0744, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0741, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0744, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0741, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0749, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0744, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0734, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0737, grad_fn=<MseLossBackward0>)
Train Step: 21  loss:  tensor(0.0737, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 22  loss:  tensor(0.0695, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0720, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0717, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0726, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0724, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0719, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0726, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0710, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0728, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0707, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0730, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0722, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0723, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0724, grad_fn=<MseLossBackward0>)
Train Step: 22  loss:  tensor(0.0718, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 23  loss:  tensor(0.0686, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0683, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0711, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0705, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0721, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0697, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0693, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0700, grad_fn=<MseLossBackward0>)
Train Step: 23  loss:  tensor(0.0713, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0706, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0713, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0694, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0689, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0713, grad_fn=<MseLossBackward0>)
Train Step: 24  loss:  tensor(0.0684, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 25  loss:  tensor(0.0694, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0672, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0688, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0652, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0678, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0698, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0695, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0679, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0665, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0684, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0677, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0692, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0693, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0662, grad_fn=<MseLossBackward0>)
Train Step: 25  loss:  tensor(0.0669, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 26  loss:  tensor(0.0668, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0658, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0695, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0661, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0684, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0649, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0666, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0671, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0675, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0672, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0677, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0670, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0663, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0661, grad_fn=<MseLossBackward0>)
Train Step: 26  loss:  tensor(0.0666, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 27  loss:  tensor(0.0661, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0666, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0686, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0689, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0658, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0658, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0653, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0665, grad_fn=<MseLossBackward0>)
Train Step: 27  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 28  loss:  tensor(0.0663, grad_fn=<MseLossBackward0>)
Train Step: 28  loss:  tensor(0.0671, grad_fn=<MseLossBackward0>)
Train Step: 28  loss:  tensor(0.0652, grad_fn=<MseLossBackward0>)
Train Step: 28  loss:  tensor(0.0652, grad_fn=<MseLossBackward0>)
Train Step: 28  loss:  tensor(0.0667, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 29  loss:  tensor(0.0648, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0648, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0646, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0652, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0656, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0646, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0645, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0646, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0663, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 29  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 30  loss:  tensor(0.0635, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0656, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0640, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0645, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0634, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0644, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0646, grad_fn=<MseLossBackward0>)
Train Step: 30  loss:  tensor(0.0653, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 31  loss:  tensor(0.0641, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0657, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0665, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0656, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0639, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0658, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0641, grad_fn=<MseLossBackward0>)
Train Step: 31  loss:  tensor(0.0636, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0640, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0654, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0634, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0640, grad_fn=<MseLossBackward0>)
Train Step: 32  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 33  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0635, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0643, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0626, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0643, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0642, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0626, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 33  loss:  tensor(0.0643, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 34  loss:  tensor(0.0646, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0640, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0660, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0634, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0641, grad_fn=<MseLossBackward0>)
Train Step: 34  loss:  tensor(0.0644, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 35  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0635, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0647, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0635, grad_fn=<MseLossBackward0>)
Train Step: 35  loss:  tensor(0.0638, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0634, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 36  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 37  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0636, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0635, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0633, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0657, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 37  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 38  loss:  tensor(0.0633, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0644, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0632, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0636, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 38  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 39  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 39  loss:  tensor(0.0650, grad_fn=<MseLossBackward0>)
Train Step: 39  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step: 39  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 39  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 39  loss:  tensor(0.0643, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0632, grad_fn=<MseLossBackward0>)
Train Step: 40  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 41  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 41  loss:  tensor(0.0632, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 42  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0621, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0631, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0637, grad_fn=<MseLossBackward0>)
Train Step: 42  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 43  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step: 43  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 43  loss:  tensor(0.0642, grad_fn=<MseLossBackward0>)
Train Step: 43  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 44  loss:  tensor(0.0627, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 45  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0627, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 45  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 46  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0627, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0621, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0621, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0629, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 46  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 48  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 48  loss:  tensor(0.0621, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 49  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 49  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 50  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0636, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 50  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 52  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 52  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 53  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0628, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 53  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 54  loss:  tensor(0.0625, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0630, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 54  loss:  tensor(0.0627, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 56  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0623, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 56  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 57  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 57  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 58  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 58  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 60  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 60  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 61  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 61  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 62  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0615, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0624, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 62  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 64  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 64  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 65  loss:  tensor(0.0621, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 65  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 66  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0616, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0605, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 66  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 68  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 68  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 69  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 69  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 71  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 71  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 72  loss:  tensor(0.0622, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 72  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 73  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 73  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 75  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 75  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 76  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0613, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 76  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 77  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0618, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0611, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 77  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 79  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 79  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 80  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0612, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0619, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0614, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 80  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 81  loss:  tensor(0.0620, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 81  loss:  tensor(0.0617, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 83  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 83  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 84  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 84  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 85  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0610, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 85  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 87  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 87  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 88  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0556, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 88  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 89  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 90  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 91  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0604, grad_fn=<MseLossBackward0>)
Train Step: 91  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 92  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 92  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 93  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 93  loss:  tensor(0.0553, grad_fn=<MseLossBackward0>)
Train Step: 93  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0608, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 94  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 95  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 95  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 96  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 96  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 97  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 97  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 97  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 97  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 98  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 99  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0600, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 99  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step

Train Step: 100  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0607, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 100  loss:  tensor(0.0594, grad_fn=<MseLossBackwar

Train Step: 101  loss:  tensor(0.0596, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 101  loss:  tensor(0.0593, grad_fn=<MseLossBackwar

Train Step: 103  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 103  loss:  tensor(0.0583, grad_fn=<MseLossBackwar

Train Step: 104  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 104  loss:  tensor(0.0576, grad_fn=<MseLossBackwar

Train Step: 105  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0602, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 105  loss:  tensor(0.0587, grad_fn=<MseLossBackwar

Train Step: 107  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0595, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 107  loss:  tensor(0.0574, grad_fn=<MseLossBackwar

Train Step: 108  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0603, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 108  loss:  tensor(0.0596, grad_fn=<MseLossBackwar

Train Step: 109  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0597, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 109  loss:  tensor(0.0581, grad_fn=<MseLossBackwar

Train Step: 111  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 111  loss:  tensor(0.0582, grad_fn=<MseLossBackwar

Train Step: 112  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 112  loss:  tensor(0.0564, grad_fn=<MseLossBackwar

Train Step: 113  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 113  loss:  tensor(0.0598, grad_fn=<MseLossBackwar

Train Step: 115  loss:  tensor(0.0556, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0606, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0599, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 115  loss:  tensor(0.0591, grad_fn=<MseLossBackwar

Train Step: 116  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 116  loss:  tensor(0.0569, grad_fn=<MseLossBackwar

Train Step: 117  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 117  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 117  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 117  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 117  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 117  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 118  loss:  tensor(0.0561, grad_fn=<MseLossBackwar

Train Step: 119  loss:  tensor(0.0550, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0559, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 119  loss:  tensor(0.0583, grad_fn=<MseLossBackwar

Train Step: 120  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 120  loss:  tensor(0.0567, grad_fn=<MseLossBackwar

Train Step: 121  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0601, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 121  loss:  tensor(0.0583, grad_fn=<MseLossBackwar

Train Step: 123  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0559, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 123  loss:  tensor(0.0580, grad_fn=<MseLossBackwar

Train Step: 124  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 124  loss:  tensor(0.0565, grad_fn=<MseLossBackwar

Train Step: 125  loss:  tensor(0.0559, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0598, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0557, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 125  loss:  tensor(0.0581, grad_fn=<MseLossBackwar

Train Step: 127  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0562, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 127  loss:  tensor(0.0575, grad_fn=<MseLossBackwar

Train Step: 128  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 128  loss:  tensor(0.0581, grad_fn=<MseLossBackwar

Train Step: 129  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 129  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0591, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 130  loss:  tensor(0.0577, grad_fn=<MseLossBackwar

Train Step: 131  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0592, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0550, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 131  loss:  tensor(0.0579, grad_fn=<MseLossBackwar

Train Step: 132  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0557, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0594, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0593, grad_fn=<MseLossBackward0>)
Train Step: 132  loss:  tensor(0.0569, grad_fn=<MseLossBackwar

Train Step: 133  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 133  loss:  tensor(0.0609, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0553, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0552, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0557, grad_fn=<MseLossBackward0>)
Train Step: 134  loss:  tensor(0.0581, grad_fn=<MseLossBackwar

Train Step: 135  loss:  tensor(0.0551, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0544, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0590, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0551, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0550, grad_fn=<MseLossBackward0>)
Train Step: 135  loss:  tensor(0.0578, grad_fn=<MseLossBackwar

Train Step: 136  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 136  loss:  tensor(0.0586, grad_fn=<MseLossBackwar

Train Step: 137  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0562, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0548, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 137  loss:  tensor(0.0573, grad_fn=<MseLossBackwar

Train Step: 138  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 138  loss:  tensor(0.0588, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0546, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0544, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0554, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 139  loss:  tensor(0.0577, grad_fn=<MseLossBackwar

Train Step: 140  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 140  loss:  tensor(0.0569, grad_fn=<MseLossBackwar

Train Step: 141  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0582, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0583, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 141  loss:  tensor(0.0559, grad_fn=<MseLossBackwar

Train Step: 142  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0562, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0570, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0555, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 142  loss:  tensor(0.0579, grad_fn=<MseLossBackwar

Train Step: 144  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0558, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0551, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 144  loss:  tensor(0.0558, grad_fn=<MseLossBackwar

Train Step: 145  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0562, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0589, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0563, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0585, grad_fn=<MseLossBackward0>)
Train Step: 145  loss:  tensor(0.0568, grad_fn=<MseLossBackwar

Train Step: 146  loss:  tensor(0.0558, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0572, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 146  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0555, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0571, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0559, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 147  loss:  tensor(0.0557, grad_fn=<MseLossBackwar

Train Step: 148  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0581, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0584, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0561, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0577, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0574, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0576, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0578, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0586, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0580, grad_fn=<MseLossBackward0>)
Train Step: 148  loss:  tensor(0.0574, grad_fn=<MseLossBackwar

Train Step: 149  loss:  tensor(0.0568, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0553, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0567, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0556, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0587, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0575, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0564, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0566, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0560, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0579, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0573, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0565, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0569, grad_fn=<MseLossBackward0>)
Train Step: 149  loss:  tensor(0.0556, grad_fn=<MseLossBackwar