In [2]:
import os 
os.chdir(os.path.pardir)
# load data from file 
import numpy as np 
save_file_name = ['fea_seq.npy', 'last_observation_seq.npy', 'label_seq.npy', 'masking_seq.npy',
                   'delta_seq.npy', 'train_valid_test_split.npy']
save_folder = 'data/raw/predict-one-day-diff/pol-met-search'
saved_arrays = []
for file_name in save_file_name:
    saved_arrays.append(np.load(os.path.join(save_folder, file_name)))
[fea_seq, last_observation_seq, label_seq, masking_seq, delta_seq, train_valid_test_split] = saved_arrays


In [3]:
# train-test-split 
train_index = [k for k in range(train_valid_test_split[0])]
dev_index = [k for k in range(train_valid_test_split[0], 
                               train_valid_test_split[0] + train_valid_test_split[1])]
test_index = [k for k in range(train_valid_test_split[0] + train_valid_test_split[1],
              train_valid_test_split[0] + train_valid_test_split[1] + train_valid_test_split[2])]

In [4]:
def get_array_by_index_range(nparray_list, label_array, index_range):
    '''
    nparray_list: list of nparrays to select according to index range 
    label_array: select the labels from label array
    '''
    # get non-na index
    non_na_index = []
    for index in index_range:
        if not np.isnan(label_array[index]):
            non_na_index.append(index)
    
    return [k[non_na_index] for k in nparray_list], label_array[non_na_index].reshape(-1)

In [5]:
# split set to train, test and dev sets 
# train set
[fea_train, last_train], label_train =  get_array_by_index_range([fea_seq,last_observation_seq], label_seq, train_index)
# dev set 
[fea_dev, last_dev], label_dev =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, dev_index)
# test set 
[fea_test, last_test], label_test =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, test_index)

In [6]:
def normalize_feature(fea_train, array_list):
    """
    array_list: [fea_dev, fea_test, last_train, last_dev, last_test] to normalize 
    """
    train_mean = np.nanmean(fea_train, axis=0)
    train_std = np.nanstd(fea_train, axis=0)
    def norm_arr(nparr):
        return(nparr - train_mean)/train_std
    return (norm_arr(fea_train), [norm_arr(k) for k in array_list])

In [7]:
fea_train, [fea_dev, fea_test, last_train, last_dev, last_test] = normalize_feature(fea_train,
                                                                                   [fea_dev, fea_test, 
                                                                                    last_train, last_dev,
                                                                                    last_test])

In [8]:
# record mean after normalization 
x_mean_aft_nor = np.nanmean(fea_train, axis=0)

In [9]:
# control experiment using last observed value for missing data imputation 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from torch.autograd import Variable, grad
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math

In [10]:
fea_train.shape

(664, 7, 52)

In [11]:
def train_mfn(X_train, y_train, X_valid, y_valid, X_test, y_test, configs):
#     p = np.random.permutation(X_train.shape[0])
    # no shuffle, keep original order 
    # swap axes for back propagation 
    def swap_axes(nparr):
        return nparr.swapaxes(0,1)
    X_train = swap_axes(X_train)
    X_valid = swap_axes(X_valid)
    X_test = swap_axes(X_test)
    
    # model parameters 
    input_size = X_train.shape[2]
    h = 128
    t = X_train.shape[0]
    output_dim = 1
    dropout = 0.5

#     d = X_train.shape[2]
#     h = 128
#     t = X_train.shape[0]
#     output_dim = 1
#     dropout = 0.5

    [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig] = configs

    
    #model = EFLSTM(d,h,output_dim,dropout)
    model = MFN(config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig)

    optimizer = optim.Adam(model.parameters(),lr=config["lr"])
    #optimizer = optim.SGD(model.parameters(),lr=config["lr"],momentum=config["momentum"])

    # optimizer = optim.SGD([
    #                 {'params':model.lstm_l.parameters(), 'lr':config["lr"]},
    #                 {'params':model.classifier.parameters(), 'lr':config["lr"]}
    #             ], momentum=0.9)

    criterion = nn.MSELoss()
    device = torch.device('cpu')
    model = model.to(device)
    criterion = criterion.to(device)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=10, factor=0.5, verbose=True)
    
#     criterion = nn.L1Loss()
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     model = model.to(device)
#     criterion = criterion.to(device)
#     scheduler = ReduceLROnPlateau(optimizer,mode='min',patience=100,factor=0.5,verbose=True)

    def train(model, batchsize, X_train, y_train, optimizer, criterion):
        epoch_loss = 0
        model.train()
        total_n = X_train.shape[1]
        num_batches = math.ceil(total_n / batchsize)
        for batch in range(num_batches):
            start = batch*batchsize
            end = (batch+1)*batchsize
            optimizer.zero_grad()
            batch_X = torch.Tensor(X_train[:,start:end])
            batch_y = torch.Tensor(y_train[start:end])
            predictions = model.forward(batch_X).squeeze(1)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        return epoch_loss / num_batches

    def evaluate(model, X_valid, y_valid, criterion):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_valid)
            batch_y = torch.Tensor(y_valid)
            predictions = model.forward(batch_X).squeeze(1)
            epoch_loss = criterion(predictions, batch_y).item()
        return epoch_loss

    def predict(model, X_test):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_test)
            predictions = model.forward(batch_X).squeeze(1)
            predictions = predictions.cpu().data.numpy()
        return predictions

    best_valid = 999999.0
    rand = random.randint(0,100000)
    print('epoch train_loss valid_loss')
    for epoch in range(config["num_epochs"]):
        train_loss = train(model, config["batchsize"], X_train, y_train, optimizer, criterion)
        valid_loss = evaluate(model, X_valid, y_valid, criterion)
        scheduler.step(valid_loss)
        if valid_loss <= best_valid:
            # save model
            best_valid = valid_loss
            print(epoch, train_loss, valid_loss, 'saving model')
            torch.save(model, 'models/temp_models/mfn_%d.pt' %rand)
        else:
            print(epoch, train_loss, valid_loss)

#     print 'model number is:', rand
    model = torch.load('models/temp_models/mfn_%d.pt' %rand)

    predictions = predict(model, X_test)
    mae = np.mean(np.absolute(predictions-y_test))
    print("mae: ", mae)
    mse = np.mean((predictions - y_test)**2)
    print("mse: ", mse)

In [12]:
class MFN(nn.Module):
    def __init__(self,config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig):
        super(MFN, self).__init__()
        [self.d_l,self.d_a] = config["input_dims"]
        [self.dh_l,self.dh_a] = config["h_dims"]
        total_h_dim = self.dh_l+self.dh_a
        
        self.mem_dim = config["memsize"]
        window_dim = config["windowsize"]
        output_dim = 1
        attInShape = total_h_dim*window_dim
        gammaInShape = attInShape+self.mem_dim
        final_out = total_h_dim+self.mem_dim
        h_att1 = NN1Config["shapes"]
        h_att2 = NN2Config["shapes"]
        h_gamma1 = gamma1Config["shapes"]
        h_gamma2 = gamma2Config["shapes"]
        h_out = outConfig["shapes"]
        att1_dropout = NN1Config["drop"]
        att2_dropout = NN2Config["drop"]
        gamma1_dropout = gamma1Config["drop"]
        gamma2_dropout = gamma2Config["drop"]
        out_dropout = outConfig["drop"]

        self.lstm_l = nn.GRUCell(self.d_l, self.dh_l)
        self.lstm_a = nn.GRUCell(self.d_a, self.dh_a)

        self.att1_fc1 = nn.Linear(attInShape, h_att1)
        self.att1_fc2 = nn.Linear(h_att1, attInShape)
        self.att1_dropout = nn.Dropout(att1_dropout)

        self.att2_fc1 = nn.Linear(attInShape, h_att2)
        self.att2_fc2 = nn.Linear(h_att2, self.mem_dim)
        self.att2_dropout = nn.Dropout(att2_dropout)

        self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1)
        self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim)
        self.gamma1_dropout = nn.Dropout(gamma1_dropout)

        self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2)
        self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim)
        self.gamma2_dropout = nn.Dropout(gamma2_dropout)

        self.out_fc1 = nn.Linear(final_out, h_out)
        self.out_fc2 = nn.Linear(h_out, output_dim)
        self.out_dropout = nn.Dropout(out_dropout)

    def forward(self,x):
        x_l = x[:,:,:self.d_l]
        x_a = x[:,:,self.d_l:self.d_l+self.d_a]
        # x is t x n x d
        n = x.shape[1]
        t = x.shape[0]
        self.h_l = torch.zeros(n, self.dh_l)
        self.h_a = torch.zeros(n, self.dh_a)

        self.c_l = torch.zeros(n, self.dh_l)
        self.c_a = torch.zeros(n, self.dh_a)
        
        self.mem = torch.zeros(n, self.mem_dim)
        all_h_ls = []
        all_h_as = []

        all_c_ls = []
        all_c_as = []

        all_mems = []
        for i in range(t):
            # prev time step
            prev_h_l = self.h_l
            prev_h_a = self.h_a

            # curr time step
#             exit(0)
            new_h_l = self.lstm_l(x_l[i], self.h_l)
            new_h_a = self.lstm_a(x_a[i], self.h_a)
   
            # concatenate
            prev_cs = torch.cat([prev_h_l,prev_h_a], dim=1)
            new_cs = torch.cat([new_h_l,new_h_a], dim=1)
            
            cStar = torch.cat([prev_cs,new_cs], dim=1)
            attention = F.softmax(self.att1_fc2(self.att1_dropout(F.relu(self.att1_fc1(cStar)))),dim=1)
            attended = attention*cStar
            
            cHat = F.tanh(self.att2_fc2(self.att2_dropout(F.relu(self.att2_fc1(attended)))))
            
            both = torch.cat([attended,self.mem], dim=1)
            gamma1 = F.sigmoid(self.gamma1_fc2(self.gamma1_dropout(F.relu(self.gamma1_fc1(both)))))
            gamma2 = F.sigmoid(self.gamma2_fc2(self.gamma2_dropout(F.relu(self.gamma2_fc1(both)))))
            
            self.mem = gamma1*self.mem + gamma2*cHat
            all_mems.append(self.mem)
            # update
            self.h_l = new_h_l
            self.h_a = new_h_a

            all_h_ls.append(self.h_l)
            all_h_as.append(self.h_a)

        # last hidden layer last_hs is n x h
        last_h_l = all_h_ls[-1]
        last_h_a = all_h_as[-1]

        last_mem = all_mems[-1]
        last_hs = torch.cat([last_h_l,last_h_a,last_mem], dim=1)
        output = self.out_fc2(self.out_dropout(F.relu(self.out_fc1(last_hs))))
        return output

In [16]:
last_train.shape

(664, 7, 52)

In [13]:
config = dict()
config["input_dims"] = [1, 4]
hl = 128
ha = 128
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.0005
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train[:,:,0:5], label_train, last_dev[:,:,0:5], label_dev, last_test[:,:,0:5], label_test, configs)

epoch train_loss valid_loss




0 40.64848395756313 32.172306060791016 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 38.42827447255453 28.981014251708984 saving model
2 34.58909429822649 23.673786163330078 saving model
3 29.38978844597226 19.39164161682129 saving model
4 26.0859858194987 19.015167236328125 saving model
5 25.692779041471937 18.97467613220215 saving model
6 25.395569438026065 19.07463836669922
7 25.333875429062616 18.591068267822266 saving model
8 24.80284818013509 18.90890884399414
9 23.839193571181525 18.6917724609375
10 23.45577398935954 18.540889739990234 saving model
11 24.918754804702033 18.686447143554688
12 23.612905820210774 18.473556518554688 saving model
13 22.95072927929106 18.91614532470703
14 22.703480448041642 18.783994674682617
15 23.82614703405471 19.088760375976562
16 22.4011386235555 18.70659637451172
17 23.4459475562686 18.734678268432617
18 22.72279911949521 19.22016143798828
19 22.593629428318568 18.57927703857422
20 22.630849792843772 18.906837463378906
21 21.589904467264812 18.87853240966797
22 22.329895564488 18.934772491455078
Epoch    24: reducing learning 

In [14]:
config = dict()
config["input_dims"] = [1, 4]
hl = 128
ha = 128
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.0005
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train[:,:,0:5], label_train, last_dev[:,:,0:5], label_dev, last_test[:,:,0:5], label_test, configs)

epoch train_loss valid_loss
0 40.64848395756313 32.172306060791016 saving model
1 38.42827447255453 28.981014251708984 saving model
2 34.58909429822649 23.673786163330078 saving model
3 29.38978844597226 19.39164161682129 saving model
4 26.0859858194987 19.015167236328125 saving model
5 25.692779041471937 18.97467613220215 saving model
6 25.395569438026065 19.07463836669922
7 25.333875429062616 18.591068267822266 saving model
8 24.80284818013509 18.90890884399414
9 23.839193571181525 18.6917724609375
10 23.45577398935954 18.540889739990234 saving model
11 24.918754804702033 18.686447143554688
12 23.612905820210774 18.473556518554688 saving model
13 22.95072927929106 18.91614532470703
14 22.703480448041642 18.783994674682617
15 23.82614703405471 19.088760375976562
16 22.4011386235555 18.70659637451172
17 23.4459475562686 18.734678268432617
18 22.72279911949521 19.22016143798828
19 22.593629428318568 18.57927703857422
20 22.630849792843772 18.906837463378906
21 21.589904467264812 18.8785

In [12]:
config = dict()
config["input_dims"] = [1, 4]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train[:,:,0:5], label_train, last_dev[:,:,0:5], label_dev, last_test[:,:,0:5], label_test, configs)

epoch train_loss valid_loss




0 36.76633980160668 22.68921661376953 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 26.650658289591473 18.734664916992188 saving model
2 27.01508240472703 19.434616088867188
3 24.296658470517112 18.99413299560547
4 23.259517942156112 18.831459045410156
5 23.14715812319801 19.12861442565918
6 23.037480127243768 18.802019119262695
7 22.624505678812664 19.084245681762695
8 20.873754728408088 19.046112060546875
9 21.695666131519136 19.361303329467773
10 20.86329105922154 18.83840560913086
11 20.559358733040945 19.44209098815918
12 20.419234048752557 17.910724639892578 saving model
13 20.6493467603411 20.808439254760742
14 19.174366860162642 19.30905532836914
15 20.435554686046782 18.339069366455078
16 19.844137146359397 19.49654769897461
17 18.848063968476794 21.234594345092773
18 19.203963188897994 19.71392059326172
19 18.338015011378697 19.369653701782227
20 17.59056804293678 20.757963180541992
21 18.275400706699916 20.173341751098633
22 17.420775231860933 20.804492950439453
Epoch    24: reducing learning rate of group 0 to 5.0000e-04.
23 17.2085835366022 21.121580123

In [9]:
class MFN(nn.Module):
    def __init__(self,config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig):
        super(MFN, self).__init__()
        [self.d_l,self.d_a] = config["input_dims"]
        [self.dh_l,self.dh_a] = config["h_dims"]
        total_h_dim = self.dh_l+self.dh_a
        
        self.mem_dim = config["memsize"]
        window_dim = config["windowsize"]
        output_dim = 1
        attInShape = total_h_dim*window_dim
        gammaInShape = attInShape+self.mem_dim
        final_out = total_h_dim+self.mem_dim
        h_att1 = NN1Config["shapes"]
        h_att2 = NN2Config["shapes"]
        h_gamma1 = gamma1Config["shapes"]
        h_gamma2 = gamma2Config["shapes"]
        h_out = outConfig["shapes"]
        att1_dropout = NN1Config["drop"]
        att2_dropout = NN2Config["drop"]
        gamma1_dropout = gamma1Config["drop"]
        gamma2_dropout = gamma2Config["drop"]
        out_dropout = outConfig["drop"]

        self.lstm_l = nn.LSTMCell(self.d_l, self.dh_l)
        self.lstm_a = nn.LSTMCell(self.d_a, self.dh_a)

        self.att1_fc1 = nn.Linear(attInShape, h_att1)
        self.att1_fc2 = nn.Linear(h_att1, attInShape)
        self.att1_dropout = nn.Dropout(att1_dropout)

        self.att2_fc1 = nn.Linear(attInShape, h_att2)
        self.att2_fc2 = nn.Linear(h_att2, self.mem_dim)
        self.att2_dropout = nn.Dropout(att2_dropout)

        self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1)
        self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim)
        self.gamma1_dropout = nn.Dropout(gamma1_dropout)

        self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2)
        self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim)
        self.gamma2_dropout = nn.Dropout(gamma2_dropout)

        self.out_fc1 = nn.Linear(final_out, h_out)
        self.out_fc2 = nn.Linear(h_out, output_dim)
        self.out_dropout = nn.Dropout(out_dropout)

    def forward(self,x):
        x_l = x[:,:,:self.d_l]
        x_a = x[:,:,self.d_l:self.d_l+self.d_a]
        # x is t x n x d
        n = x.shape[1]
        t = x.shape[0]
        self.h_l = torch.zeros(n, self.dh_l)
        self.h_a = torch.zeros(n, self.dh_a)

        self.c_l = torch.zeros(n, self.dh_l)
        self.c_a = torch.zeros(n, self.dh_a)
        
        self.mem = torch.zeros(n, self.mem_dim)
        all_h_ls = []
        all_h_as = []

        all_c_ls = []
        all_c_as = []

        all_mems = []
        for i in range(t):
            # prev time step
            prev_c_l = self.c_l
            prev_c_a = self.c_a

            # curr time step
            new_h_l, new_c_l = self.lstm_l(x_l[i], (self.h_l, self.c_l))
            new_h_a, new_c_a = self.lstm_a(x_a[i], (self.h_a, self.c_a))
   
            # concatenate
            prev_cs = torch.cat([prev_c_l,prev_c_a], dim=1)
            new_cs = torch.cat([new_c_l,new_c_a], dim=1)
            
            cStar = torch.cat([prev_cs,new_cs], dim=1)
            attention = F.softmax(self.att1_fc2(self.att1_dropout(F.relu(self.att1_fc1(cStar)))),dim=1)
            attended = attention*cStar
            
            cHat = F.tanh(self.att2_fc2(self.att2_dropout(F.relu(self.att2_fc1(attended)))))
            
            both = torch.cat([attended,self.mem], dim=1)
            gamma1 = F.sigmoid(self.gamma1_fc2(self.gamma1_dropout(F.relu(self.gamma1_fc1(both)))))
            gamma2 = F.sigmoid(self.gamma2_fc2(self.gamma2_dropout(F.relu(self.gamma2_fc1(both)))))
            
            self.mem = gamma1*self.mem + gamma2*cHat
            all_mems.append(self.mem)
            # update
            self.h_l, self.c_l = new_h_l, new_c_l
            self.h_a, self.c_a = new_h_a, new_c_a

            all_h_ls.append(self.h_l)
            all_h_as.append(self.h_a)
 
            all_c_ls.append(self.c_l)
            all_c_as.append(self.c_a)

        # last hidden layer last_hs is n x h
        last_h_l = all_h_ls[-1]
        last_h_a = all_h_as[-1]

        last_mem = all_mems[-1]
        last_hs = torch.cat([last_h_l,last_h_a,last_mem], dim=1)
        output = self.out_fc2(self.out_dropout(F.relu(self.out_fc1(last_hs))))
        return output

In [18]:
config = dict()
config["input_dims"] = [1, 4]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train[:,:,0:5], label_train, last_dev[:,:,0:5], label_dev, last_test[:,:,0:5], label_test, configs)

epoch train_loss valid_loss
0 40.40646107991537 33.01768493652344 saving model
1 39.760721842447914 32.0798225402832 saving model
2 38.700338999430336 30.67528533935547 saving model
3 37.22062428792318 28.808597564697266 saving model
4 35.17368825276693 26.693241119384766 saving model
5 33.8869997660319 24.551103591918945 saving model
6 30.90236536661784 22.54726791381836 saving model
7 28.18731625874837 20.56934928894043 saving model
8 25.728047688802082 19.704343795776367 saving model
9 24.77847671508789 20.08586311340332
10 24.603047053019207 20.113313674926758
11 22.889197667439777 19.138946533203125 saving model
12 24.11378796895345 18.722599029541016 saving model
13 23.237943013509113 18.890501022338867
14 23.032106399536133 19.177263259887695
15 22.720221837361652 19.596290588378906
16 22.287209192911785 19.857763290405273
17 22.313144048055012 19.48477554321289
18 22.282973607381184 18.978906631469727
19 22.483434677124023 18.614791870117188 saving model
20 22.58549690246582 18

In [13]:
last_train.shape

(664, 7, 52)