In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
#         print(os.path.join(dirname, filename))
        continue
import torch
import torch.nn as nn
import numpy as np
import time
import math
from matplotlib import pyplot
import joblib
from sklearn.preprocessing import MinMaxScaler

torch.manual_seed(0)
np.random.seed(0)
calculate_loss_over_all_values = False

In [2]:
# # for TPU
# device = xm.xla_device()
# torch.set_default_tensor_type('torch.FloatTensor')
# import torch_xla
# import torch_xla.core.xla_model as xm

# # FOR GPU AND CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
# S is the source sequence length
# T is the target sequence length
# N is the batch size
# E is the feature number
#src = torch.rand((10, 32, 512)) # (S,N,E) 
#tgt = torch.rand((20, 32, 512)) # (T,N,E)
#out = transformer_model(src, tgt)
#print(out)

input_window = 300
output_window = 5
batch_size = 20 # batch size

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        #pe.requires_grad = False
        self.register_buffer('pe', pe)
    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransAm(nn.Module):
    def __init__(self,feature_size=30,num_layers=2,dropout=0.2):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
        self.decoder = nn.Linear(feature_size,1)
        self.init_weights()
    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)
    def forward(self,src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src,self.src_mask)#, self.src_mask)
        output = self.decoder(output)
        return output
    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

# if window is 100 and prediction step is 1
# in -> [0..99]
# target -> [1..100]
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = np.append(input_data[i:i+tw][:-output_window] , output_window * [0])
        train_label = input_data[i:i+tw]
        #train_label = input_data[i+output_window:i+tw+output_window]
        inout_seq.append((train_seq ,train_label))
    return torch.FloatTensor(inout_seq)

def get_data2(inst):
    this_inst_df = joblib.load(f'./dt11112222/{inst}.p')
    amplitude = this_inst_df['close'].to_numpy()
    amplitude = amplitude.reshape(-1)
    print(amplitude.shape)
    
    scaler = MinMaxScaler(feature_range=(-15, 15)) 
    amplitude = scaler.fit_transform(amplitude.reshape(-1, 1)).reshape(-1)
    
    sampels = int(amplitude.shape[0]*0.95)
    train_data = amplitude[:sampels]
    test_data = amplitude[sampels:]

    train_sequence = create_inout_sequences(train_data,input_window)
    train_sequence = train_sequence[:-output_window]
    print(train_sequence.shape)
    
    test_data = create_inout_sequences(test_data,input_window)
    test_data = test_data[:-output_window]
    print(test_data.shape)
    
    return train_sequence.to(device),test_data.to(device)

def get_batch(source, i,batch_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i:i+seq_len]    
    input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window,1)) # 1 is feature size
    target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window,1))
    return input, target


def train(train_data):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
        data, targets = get_batch(train_data, i,batch_size)
        optimizer.zero_grad()
        output = model(data)        
        if calculate_loss_over_all_values:
            loss = criterion(output, targets)
        else:
            loss = criterion(output[-output_window:], targets[-output_window:])
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        total_loss += loss.item()
        log_interval = int(len(train_data) / batch_size / 5)
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.6f} | {:5.2f} ms | '
                  'loss {:5.5f} | ppl {:8.2f}'.format(
                    epoch, batch, len(train_data) // batch_size, scheduler.get_lr()[0],
                    elapsed * 1000 / log_interval,
                    cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
plot_counter = 0
def plot_and_loss(eval_model, data_source,epoch,tknip):
    global plot_counter
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i,1)
            # look like the model returns static values for the output window
            output = eval_model(data)    
            if calculate_loss_over_all_values:                                
                total_loss += criterion(output, target).item()
            else:
                total_loss += criterion(output[-output_window:], target[-output_window:]).item()

            test_result = torch.cat((test_result.to(device), output[-1].view(-1).to(device)), 0) #todo: check this. -> looks good to me
            truth = torch.cat((truth.to(device), target[-1].view(-1).to(device)), 0)
            
#             test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) #todo: check this. -> looks good to me
#             truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)
            
    test_result = test_result.cpu().numpy()
    truth = truth.cpu().numpy()
    len(test_result)
    plot_counter +=1
    pyplot.plot(test_result,color="red")
    pyplot.plot(truth[:500],color="blue")
    pyplot.plot(test_result-truth,color="green")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    pyplot.savefig(f'./plots_5x/transformer-epoch_{plot_counter}_{epoch}_{tknip}.png')
   
    pyplot.close()
    
    return total_loss / i


def predict_future(eval_model, data_source,steps,tknip):
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    _ , data = get_batch(data_source, 0,1)
    with torch.no_grad():
        for i in range(0, steps,1):
            input = torch.clone(data[-input_window:])
            input[-output_window:] = 0     
            output = eval_model(data[-input_window:])                        
            data = torch.cat((data, output[-1:]))
            
#     data = data.to(device).view(-1)
    data = data.cpu().view(-1)
    pyplot.plot(data,color="red")       
    pyplot.plot(data[:input_window],color="blue")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    pyplot.savefig(f'./plots_5x/transformer-future_{plot_counter}_{steps}_{tknip}.png')
    pyplot.close()
        
# entweder ist hier ein fehler im loss oder in der train methode, aber die ergebnisse sind unterschiedlich 
# auch zu denen der predict_future
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    eval_batch_size = 1000
    with torch.no_grad():
        for i in range(0, len(data_source) - 1, eval_batch_size):
            data, targets = get_batch(data_source, i,eval_batch_size)
            output = eval_model(data)            
            if calculate_loss_over_all_values:
                total_loss += len(data[0])* criterion(output, targets).to(device).item()
#                 total_loss += len(data[0])* criterion(output, targets).cpu().item()
            else:                                
                total_loss += len(data[0])* criterion(output[-output_window:], targets[-output_window:]).to(device).item()            
#                 total_loss += len(data[0])* criterion(output[-output_window:], targets[-output_window:]).cpu().item()            
    return total_loss / len(data_source)

In [4]:
# model = TransAm().to(device)
model= torch.load('./best_model_multi11.pt')

valid_loss_list = []

criterion = nn.MSELoss()
lr = 0.000001
#optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)

best_val_loss = float("inf")
epochs = 1 # The number of epochs
best_model = None

In [None]:
counter = 0
files_to_consider = 15

load_counter = 0

while True:
    for dirname, _, filenames in os.walk('./dt11112222/'):
        for filename in filenames:
            path_str = os.path.join(dirname, filename)
            split_lst = path_str.split('.')
            contains = 'p' in split_lst
            if(contains): ## and counter<files_to_consider

                one_plty = filename.split('/')[0].split('.')[0]

                train_data, val_data = get_data2(one_plty)

                for epoch in range(1, epochs + 1):
                    epoch_start_time = time.time()
                    train(train_data)

                    if(epoch % 1 == 0):
                        val_loss = plot_and_loss(model, val_data,epoch,one_plty)
                        predict_future(model, val_data,200,one_plty)
                    else:
                        val_loss = evaluate(model, val_data)

                    print('-' * 89)
                    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                                     val_loss, math.exp(val_loss)))
                    print('-' * 89)
                    
                    torch.save(model,'./best_model_multi10.pt')
                    torch.save(model,f'./inst_wise_model_5x/{one_plty}.pt')
                    
                    load_counter+=1
                    valid_loss_list.append({'inst':one_plty,
                                            'loss':val_loss})
                    valid_loss_map_df = pd.DataFrame(valid_loss_list)
                    valid_loss_map_df.to_excel('./valid_loss_map_df_5x.xlsx')
                    
                    
                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        best_model = model
                        torch.save(best_model,'./best_model_multi10_best.pt')
                        model= torch.load('./best_model_multi11_best.pt')

    

                #src = torch.rand(input_window, batch_size, 1) # (source sequence length,batch size,feature number) 
                #out = model(src)
                #
                #print(out)
                #print(out.shape)

                print(path_str)
                counter+=1
                
    scheduler.step() 

(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5489 batches | lr 0.000001 | 61.29 ms | loss 0.53168 | ppl     1.70
| epoch   1 |  2194/ 5489 batches | lr 0.000001 | 61.15 ms | loss 0.04943 | ppl     1.05
| epoch   1 |  3291/ 5489 batches | lr 0.000001 | 61.09 ms | loss 0.04382 | ppl     1.04
| epoch   1 |  4388/ 5489 batches | lr 0.000001 | 61.11 ms | loss 0.09877 | ppl     1.10
| epoch   1 |  5485/ 5489 batches | lr 0.000001 | 61.25 ms | loss 0.11499 | ppl     1.12
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.26s | valid loss 10.87611 | valid ppl 52897.26
-----------------------------------------------------------------------------------------
./dt11112222/2952193.p
(115877,)
torch.Size([109778, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.24 ms | loss 2.63398 | ppl    13.93
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 57.01 ms | loss 0.35737 | ppl     1.43
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 53.35 ms | loss 0.07555 | ppl     1.08
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.31 ms | loss 0.18780 | ppl     1.21
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.25 ms | loss 0.65155 | ppl     1.92
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 340.90s | valid loss 2.02218 | valid ppl     7.55
-----------------------------------------------------------------------------------------
./dt11112222/895745.p
(115858,)
torch.Size([109760, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.28 ms | loss 5.09454 | ppl   163.13
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.28 ms | loss 0.71411 | ppl     2.04
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.37 ms | loss 0.81275 | ppl     2.25
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.23 ms | loss 0.61250 | ppl     1.85
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 51.95 ms | loss 0.29729 | ppl     1.35
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 343.67s | valid loss 0.15922 | valid ppl     1.17
-----------------------------------------------------------------------------------------
./dt11112222/2585345.p
(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5489 batches | lr 0.000001 | 61.31 ms | loss 0.81536 | ppl     2.26
| epoch   1 |  2194/ 5489 batches | lr 0.000001 | 61.27 ms | loss 2.72213 | ppl    15.21
| epoch   1 |  3291/ 5489 batches | lr 0.000001 | 61.16 ms | loss 0.82939 | ppl     2.29
| epoch   1 |  4388/ 5489 batches | lr 0.000001 | 61.26 ms | loss 0.12803 | ppl     1.14
| epoch   1 |  5485/ 5489 batches | lr 0.000001 | 61.23 ms | loss 0.70024 | ppl     2.01
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.62s | valid loss 4.57990 | valid ppl    97.50
-----------------------------------------------------------------------------------------
./dt11112222/969473.p
(115849,)
torch.Size([109751, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 55.99 ms | loss 1.49771 | ppl     4.47
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 56.69 ms | loss 0.94838 | ppl     2.58
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.44 ms | loss 0.47327 | ppl     1.61
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.32 ms | loss 0.08392 | ppl     1.09
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.15 ms | loss 2.25965 | ppl     9.58
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 343.33s | valid loss 1.14967 | valid ppl     3.16
-----------------------------------------------------------------------------------------
./dt11112222/1195009.p
(115858,)
torch.Size([109760, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.25 ms | loss 2.48392 | ppl    11.99
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.25 ms | loss 0.08289 | ppl     1.09
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 59.53 ms | loss 0.10916 | ppl     1.12
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 53.09 ms | loss 0.08923 | ppl     1.09
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.36 ms | loss 0.16104 | ppl     1.17
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.94s | valid loss 8.13450 | valid ppl  3410.11
-----------------------------------------------------------------------------------------
./dt11112222/523009.p
(115871,)
torch.Size([109772, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.21 ms | loss 0.69287 | ppl     2.00
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.15 ms | loss 3.45592 | ppl    31.69
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.15 ms | loss 1.90421 | ppl     6.71
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.09 ms | loss 0.24887 | ppl     1.28
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.11 ms | loss 0.16999 | ppl     1.19
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 350.85s | valid loss 7.00482 | valid ppl  1101.94
-----------------------------------------------------------------------------------------
./dt11112222/3675137.p
(115879,)
torch.Size([109780, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5489 batches | lr 0.000001 | 60.34 ms | loss 2.63910 | ppl    14.00
| epoch   1 |  2194/ 5489 batches | lr 0.000001 | 61.22 ms | loss 0.74731 | ppl     2.11
| epoch   1 |  3291/ 5489 batches | lr 0.000001 | 61.49 ms | loss 0.04613 | ppl     1.05
| epoch   1 |  4388/ 5489 batches | lr 0.000001 | 61.38 ms | loss 0.04720 | ppl     1.05
| epoch   1 |  5485/ 5489 batches | lr 0.000001 | 61.28 ms | loss 0.11308 | ppl     1.12
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.07s | valid loss 10.01277 | valid ppl 22309.59
-----------------------------------------------------------------------------------------
./dt11112222/951809.p
(115853,)
torch.Size([109755, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.23 ms | loss 2.18831 | ppl     8.92
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 57.89 ms | loss 0.14931 | ppl     1.16
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 54.48 ms | loss 0.09622 | ppl     1.10
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.14 ms | loss 0.87830 | ppl     2.41
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.08 ms | loss 1.40562 | ppl     4.08
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.54s | valid loss 0.42434 | valid ppl     1.53
-----------------------------------------------------------------------------------------
./dt11112222/98049.p
(115853,)
torch.Size([109755, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.18 ms | loss 2.41651 | ppl    11.21
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 61.15 ms | loss 1.22052 | ppl     3.39
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.12 ms | loss 0.44676 | ppl     1.56
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.12 ms | loss 0.09451 | ppl     1.10
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 51.19 ms | loss 0.45911 | ppl     1.58
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.95s | valid loss 0.26036 | valid ppl     1.30
-----------------------------------------------------------------------------------------
./dt11112222/558337.p
(115876,)
torch.Size([109777, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.18 ms | loss 2.56862 | ppl    13.05
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.11 ms | loss 0.16024 | ppl     1.17
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.13 ms | loss 0.11139 | ppl     1.12
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.18 ms | loss 0.64873 | ppl     1.91
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.15 ms | loss 2.13986 | ppl     8.50
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.66s | valid loss 0.13352 | valid ppl     1.14
-----------------------------------------------------------------------------------------
./dt11112222/3365633.p
(115852,)
torch.Size([109754, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 57.05 ms | loss 0.38198 | ppl     1.47
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 55.63 ms | loss 0.09850 | ppl     1.10
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.19 ms | loss 5.49590 | ppl   243.69
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.20 ms | loss 0.25487 | ppl     1.29
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.22 ms | loss 0.39163 | ppl     1.48
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.60s | valid loss 1.08486 | valid ppl     2.96
-----------------------------------------------------------------------------------------
./dt11112222/108033.p
(115876,)
torch.Size([109777, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.25 ms | loss 0.79353 | ppl     2.21
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.24 ms | loss 0.40399 | ppl     1.50
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 60.32 ms | loss 1.70321 | ppl     5.49
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 52.36 ms | loss 0.40728 | ppl     1.50
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.20 ms | loss 0.29021 | ppl     1.34
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.79s | valid loss 7.99610 | valid ppl  2969.37
-----------------------------------------------------------------------------------------
./dt11112222/3834113.p
(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5489 batches | lr 0.000001 | 61.18 ms | loss 0.36406 | ppl     1.44
| epoch   1 |  2194/ 5489 batches | lr 0.000001 | 61.20 ms | loss 1.11222 | ppl     3.04
| epoch   1 |  3291/ 5489 batches | lr 0.000001 | 61.29 ms | loss 0.81451 | ppl     2.26
| epoch   1 |  4388/ 5489 batches | lr 0.000001 | 61.25 ms | loss 6.46701 | ppl   643.56
| epoch   1 |  5485/ 5489 batches | lr 0.000001 | 61.27 ms | loss 0.53240 | ppl     1.70
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 352.07s | valid loss 0.89003 | valid ppl     2.44
-----------------------------------------------------------------------------------------
./dt11112222/4278529.p
(89759,)
torch.Size([84966, 2, 300])
torch.Size([4183, 2, 300])




| epoch   1 |   849/ 4248 batches | lr 0.000001 | 53.78 ms | loss 0.80764 | ppl     2.24
| epoch   1 |  1698/ 4248 batches | lr 0.000001 | 61.20 ms | loss 2.21260 | ppl     9.14
| epoch   1 |  2547/ 4248 batches | lr 0.000001 | 61.13 ms | loss 0.11679 | ppl     1.12
| epoch   1 |  3396/ 4248 batches | lr 0.000001 | 61.17 ms | loss 1.25251 | ppl     3.50
| epoch   1 |  4245/ 4248 batches | lr 0.000001 | 61.29 ms | loss 0.63811 | ppl     1.89
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 267.08s | valid loss 8.21851 | valid ppl  3708.98
-----------------------------------------------------------------------------------------
./dt11112222/4488705.p
(115870,)
torch.Size([109771, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.25 ms | loss 0.21236 | ppl     1.24
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.28 ms | loss 0.13701 | ppl     1.15
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.25 ms | loss 0.09716 | ppl     1.10
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 51.05 ms | loss 0.18254 | ppl     1.20
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.13 ms | loss 0.60617 | ppl     1.83
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.66s | valid loss 1.06984 | valid ppl     2.91
-----------------------------------------------------------------------------------------
./dt11112222/3924993.p
(115857,)
torch.Size([109759, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.19 ms | loss 6.04851 | ppl   423.48
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 61.16 ms | loss 2.63407 | ppl    13.93
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.50 ms | loss 3.43953 | ppl    31.17
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.24 ms | loss 4.49561 | ppl    89.62
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.19 ms | loss 0.93343 | ppl     2.54
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.77s | valid loss 0.37965 | valid ppl     1.46
-----------------------------------------------------------------------------------------
./dt11112222/3039233.p
(115881,)
torch.Size([109781, 2, 300])
torch.Size([5490, 2, 300])




| epoch   1 |  1097/ 5489 batches | lr 0.000001 | 53.19 ms | loss 0.09726 | ppl     1.10
| epoch   1 |  2194/ 5489 batches | lr 0.000001 | 61.09 ms | loss 2.69829 | ppl    14.85
| epoch   1 |  3291/ 5489 batches | lr 0.000001 | 61.16 ms | loss 10.13801 | ppl 25286.06
| epoch   1 |  4388/ 5489 batches | lr 0.000001 | 61.14 ms | loss 0.33899 | ppl     1.40
| epoch   1 |  5485/ 5489 batches | lr 0.000001 | 61.36 ms | loss 0.95256 | ppl     2.59
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 344.72s | valid loss 1.35913 | valid ppl     3.89
-----------------------------------------------------------------------------------------
./dt11112222/975873.p
(115832,)
torch.Size([109735, 2, 300])
torch.Size([5487, 2, 300])




| epoch   1 |  1097/ 5486 batches | lr 0.000001 | 61.31 ms | loss 3.87205 | ppl    48.04
| epoch   1 |  2194/ 5486 batches | lr 0.000001 | 61.14 ms | loss 1.10818 | ppl     3.03
| epoch   1 |  3291/ 5486 batches | lr 0.000001 | 51.35 ms | loss 0.16887 | ppl     1.18
| epoch   1 |  4388/ 5486 batches | lr 0.000001 | 61.25 ms | loss 0.06214 | ppl     1.06
| epoch   1 |  5485/ 5486 batches | lr 0.000001 | 61.26 ms | loss 0.11805 | ppl     1.13
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.58s | valid loss 3.76513 | valid ppl    43.17
-----------------------------------------------------------------------------------------
./dt11112222/1793.p
(115877,)
torch.Size([109778, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.23 ms | loss 2.74816 | ppl    15.61
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.28 ms | loss 3.38066 | ppl    29.39
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.21 ms | loss 0.16757 | ppl     1.18
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.32 ms | loss 0.40930 | ppl     1.51
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 52.62 ms | loss 1.54525 | ppl     4.69
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 344.28s | valid loss 0.13221 | valid ppl     1.14
-----------------------------------------------------------------------------------------
./dt11112222/884737.p
(115860,)
torch.Size([109762, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.59 ms | loss 2.02295 | ppl     7.56
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.13 ms | loss 0.29025 | ppl     1.34
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.16 ms | loss 0.09643 | ppl     1.10
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.11 ms | loss 0.07433 | ppl     1.08
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 60.01 ms | loss 0.19669 | ppl     1.22
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 348.30s | valid loss 4.53721 | valid ppl    93.43
-----------------------------------------------------------------------------------------
./dt11112222/348929.p
(115857,)
torch.Size([109759, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.33 ms | loss 0.26440 | ppl     1.30
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 61.25 ms | loss 0.40917 | ppl     1.51
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.26 ms | loss 6.09788 | ppl   444.91
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.39 ms | loss 0.34600 | ppl     1.41
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.13 ms | loss 0.53233 | ppl     1.70
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.61s | valid loss 1.35404 | valid ppl     3.87
-----------------------------------------------------------------------------------------
./dt11112222/232961.p
(115877,)
torch.Size([109778, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.18 ms | loss 3.15781 | ppl    23.52
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 53.30 ms | loss 3.02576 | ppl    20.61
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 58.72 ms | loss 0.28667 | ppl     1.33
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.11 ms | loss 0.22430 | ppl     1.25
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.15 ms | loss 0.85939 | ppl     2.36
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.31s | valid loss 16.29688 | valid ppl 11957650.31
-----------------------------------------------------------------------------------------
./dt11112222/878593.p
(115856,)
torch.Size([109758, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.23 ms | loss 2.32347 | ppl    10.21
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 61.17 ms | loss 0.38145 | ppl     1.46
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.23 ms | loss 0.19627 | ppl     1.22
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 58.45 ms | loss 0.11035 | ppl     1.12
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 54.08 ms | loss 0.63977 | ppl     1.90
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.64s | valid loss 0.13813 | valid ppl     1.15
-----------------------------------------------------------------------------------------
./dt11112222/486657.p
(115861,)
torch.Size([109762, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  1097/ 5488 batches | lr 0.000001 | 61.26 ms | loss 0.07567 | ppl     1.08
| epoch   1 |  2194/ 5488 batches | lr 0.000001 | 61.20 ms | loss 0.33922 | ppl     1.40
| epoch   1 |  3291/ 5488 batches | lr 0.000001 | 61.13 ms | loss 7.93691 | ppl  2798.71
| epoch   1 |  4388/ 5488 batches | lr 0.000001 | 61.18 ms | loss 0.60872 | ppl     1.84
| epoch   1 |  5485/ 5488 batches | lr 0.000001 | 61.27 ms | loss 2.22708 | ppl     9.27
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 353.55s | valid loss 3.44285 | valid ppl    31.28
-----------------------------------------------------------------------------------------
./dt11112222/7712001.p
(115849,)
torch.Size([109751, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 51.42 ms | loss 0.09378 | ppl     1.10
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 61.17 ms | loss 0.08141 | ppl     1.08
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.19 ms | loss 2.90848 | ppl    18.33
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.12 ms | loss 0.16932 | ppl     1.18
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.15 ms | loss 0.58217 | ppl     1.79
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.36s | valid loss 0.80519 | valid ppl     2.24
-----------------------------------------------------------------------------------------
./dt11112222/54273.p
(29306,)
torch.Size([27535, 2, 300])
torch.Size([1161, 2, 300])




| epoch   1 |   275/ 1376 batches | lr 0.000001 | 61.51 ms | loss 0.30114 | ppl     1.35
| epoch   1 |   550/ 1376 batches | lr 0.000001 | 61.24 ms | loss 2.00446 | ppl     7.42
| epoch   1 |   825/ 1376 batches | lr 0.000001 | 61.15 ms | loss 0.14452 | ppl     1.16
| epoch   1 |  1100/ 1376 batches | lr 0.000001 | 61.11 ms | loss 0.34151 | ppl     1.41
| epoch   1 |  1375/ 1376 batches | lr 0.000001 | 61.20 ms | loss 3.33897 | ppl    28.19
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 88.35s | valid loss 4.01719 | valid ppl    55.54
-----------------------------------------------------------------------------------------
./dt11112222/3484417.p
(115849,)
torch.Size([109751, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  1097/ 5487 batches | lr 0.000001 | 61.47 ms | loss 0.39130 | ppl     1.48
| epoch   1 |  2194/ 5487 batches | lr 0.000001 | 51.40 ms | loss 0.65365 | ppl     1.92
| epoch   1 |  3291/ 5487 batches | lr 0.000001 | 61.16 ms | loss 2.54352 | ppl    12.72
| epoch   1 |  4388/ 5487 batches | lr 0.000001 | 61.14 ms | loss 0.86538 | ppl     2.38
| epoch   1 |  5485/ 5487 batches | lr 0.000001 | 61.18 ms | loss 0.23546 | ppl     1.27
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 342.61s | valid loss 11.21307 | valid ppl 74092.71
-----------------------------------------------------------------------------------------
./dt11112222/40193.p
(115839,)
torch.Size([109742, 2, 300])
torch.Size([5487, 2, 300])


In [None]:
torch.save(model,'./best_model_multi10.pt')

In [None]:
print('sdfsdf')

In [None]:
train_data.shape

In [None]:
val_data.shape

In [None]:
# for epoch in range(1, epochs + 1):
#     epoch_start_time = time.time()
#     train(train_data)
    
#     if(epoch % 1 == 0):
#         val_loss = plot_and_loss(model, val_data,epoch)
#         predict_future(model, val_data,200)
#     else:
#         val_loss = evaluate(model, val_data)
        
#     print('-' * 89)
#     print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
#                                      val_loss, math.exp(val_loss)))
#     print('-' * 89)

#     if val_loss < best_val_loss:
#         best_val_loss = val_loss
#         best_model = model
#         torch.save(best_model,'./best_model_multi.pt')

#     scheduler.step() 

# #src = torch.rand(input_window, batch_size, 1) # (source sequence length,batch size,feature number) 
# #out = model(src)
# #
# #print(out)
# #print(out.shape)

In [None]:
# src = torch.rand(30, batch_size, 30) # (source sequence length,batch size,feature number) 
# src = src.to(device="cuda")
# # out = gf(val_data[0])
# print(src.shape)
# out = model(src)

# print(out)
# print(out.shape)

In [None]:
print('hi')

In [None]:
# gf= torch.load('../input/dt11112222/best_model_multi.pt')