In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import joblib
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
#         print(os.path.join(dirname, filename))
        continue
import torch
import torch.nn as nn
import numpy as np
import time
import math
from matplotlib import pyplot
import joblib
from sklearn.preprocessing import MinMaxScaler

torch.manual_seed(0)
np.random.seed(0)
calculate_loss_over_all_values = False

In [2]:
# # for TPU
# device = xm.xla_device()
# torch.set_default_tensor_type('torch.FloatTensor')
# import torch_xla
# import torch_xla.core.xla_model as xm

## FOR GPU AND CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
# import joblib
# this_inst_df = joblib.load(f'./dt11112222/1793.p')
# this_inst_df

In [3]:
# S is the source sequence length
# T is the target sequence length
# N is the batch size
# E is the feature number
#src = torch.rand((10, 32, 512)) # (S,N,E) 
#tgt = torch.rand((20, 32, 512)) # (T,N,E)
#out = transformer_model(src, tgt)
#print(out)

input_window = 300
output_window = 5
batch_size = 5 # batch size

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        #pe.requires_grad = False
        self.register_buffer('pe', pe)
    def forward(self, x):
        return x + self.pe[:x.size(0), :]

class TransAm(nn.Module):
    def __init__(self,feature_size=30,num_layers=2,dropout=0.2):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
        self.decoder = nn.Linear(feature_size,1)
        self.init_weights()
    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)
    def forward(self,src):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src,self.src_mask)#, self.src_mask)
        output = self.decoder(output)
        return output
    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

# if window is 100 and prediction step is 1
# in -> [0..99]
# target -> [1..100]
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = np.append(input_data[i:i+tw][:-output_window] , output_window * [0])
        train_label = input_data[i:i+tw]
        #train_label = input_data[i+output_window:i+tw+output_window]
        inout_seq.append((train_seq ,train_label))
    return torch.FloatTensor(inout_seq)

def get_data2(inst):
    this_inst_df = joblib.load(f'./dt11112222/{inst}.p')
    amplitude = this_inst_df['close'].to_numpy()
    amplitude = amplitude.reshape(-1)
    print(amplitude.shape)
    
    scaler = MinMaxScaler(feature_range=(-15, 15)) 
    amplitude = scaler.fit_transform(amplitude.reshape(-1, 1)).reshape(-1)
    
    sampels = int(amplitude.shape[0]*0.95)
    train_data = amplitude[:sampels]
    test_data = amplitude[sampels:]

    train_sequence = create_inout_sequences(train_data,input_window)
    train_sequence = train_sequence[:-output_window]
    print(train_sequence.shape)
    
    test_data = create_inout_sequences(test_data,input_window)
    test_data = test_data[:-output_window]
    print(test_data.shape)
    
    return train_sequence.to(device),test_data.to(device)

def get_batch(source, i,batch_size):
    seq_len = min(batch_size, len(source) - 1 - i)
    data = source[i:i+seq_len]    
    input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window,1)) # 1 is feature size
    target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window,1))
    return input, target


def train(train_data):
    model.train() # Turn on the train mode
    total_loss = 0.
    start_time = time.time()
    for batch, i in enumerate(range(0, len(train_data) - 1, batch_size)):
        data, targets = get_batch(train_data, i,batch_size)
        optimizer.zero_grad()
        output = model(data)        
        if calculate_loss_over_all_values:
            loss = criterion(output, targets)
        else:
            loss = criterion(output[-output_window:], targets[-output_window:])
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()
        total_loss += loss.item()
        log_interval = int(len(train_data) / batch_size / 5)
        if batch % log_interval == 0 and batch > 0:
            cur_loss = total_loss / log_interval
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | '
                  'lr {:02.6f} | {:5.2f} ms | '
                  'loss {:5.5f} | ppl {:8.2f}'.format(
                    epoch, batch, len(train_data) // batch_size, scheduler.get_lr()[0],
                    elapsed * 1000 / log_interval,
                    cur_loss, math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
plot_counter = 0
def plot_and_loss(eval_model, data_source,epoch,tknip):
    global plot_counter
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    with torch.no_grad():
        for i in range(0, len(data_source) - 1):
            data, target = get_batch(data_source, i,1)
            # look like the model returns static values for the output window
            output = eval_model(data)    
            if calculate_loss_over_all_values:                                
                total_loss += criterion(output, target).item()
            else:
                total_loss += criterion(output[-output_window:], target[-output_window:]).item()

            test_result = torch.cat((test_result.to(device), output[-1].view(-1).to(device)), 0) #todo: check this. -> looks good to me
            truth = torch.cat((truth.to(device), target[-1].view(-1).to(device)), 0)
            
#             test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) #todo: check this. -> looks good to me
#             truth = torch.cat((truth, target[-1].view(-1).cpu()), 0)
            
    test_result = test_result.cpu().numpy()
    truth = truth.cpu().numpy()
    len(test_result)
    plot_counter +=1
    pyplot.plot(test_result,color="red")
    pyplot.plot(truth[:500],color="blue")
    pyplot.plot(test_result-truth,color="green")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    pyplot.savefig(f'./plots2/transformer-epoch_{plot_counter}_{epoch}_{tknip}.png')
    pyplot.close()
    
    return total_loss / i


def predict_future(eval_model, data_source,steps,tknip):
    eval_model.eval() 
    total_loss = 0.
    test_result = torch.Tensor(0)    
    truth = torch.Tensor(0)
    _ , data = get_batch(data_source, 0,1)
    with torch.no_grad():
        for i in range(0, steps,1):
            input = torch.clone(data[-input_window:])
            input[-output_window:] = 0     
            output = eval_model(data[-input_window:])                        
            data = torch.cat((data, output[-1:]))
            
#     data = data.to(device).view(-1)
    data = data.cpu().view(-1)
    pyplot.plot(data,color="red")       
    pyplot.plot(data[:input_window],color="blue")
    pyplot.grid(True, which='both')
    pyplot.axhline(y=0, color='k')
    pyplot.savefig(f'./plots2/transformer-future_{plot_counter}_{steps}_{tknip}.png')
    pyplot.close()
        
# entweder ist hier ein fehler im loss oder in der train methode, aber die ergebnisse sind unterschiedlich 
# auch zu denen der predict_future
def evaluate(eval_model, data_source):
    eval_model.eval() # Turn on the evaluation mode
    total_loss = 0.
    eval_batch_size = 1000
    with torch.no_grad():
        for i in range(0, len(data_source) - 1, eval_batch_size):
            data, targets = get_batch(data_source, i,eval_batch_size)
            output = eval_model(data)            
            if calculate_loss_over_all_values:
                total_loss += len(data[0])* criterion(output, targets).to(device).item()
#                 total_loss += len(data[0])* criterion(output, targets).cpu().item()
            else:                                
                total_loss += len(data[0])* criterion(output[-output_window:], targets[-output_window:]).to(device).item()            
#                 total_loss += len(data[0])* criterion(output[-output_window:], targets[-output_window:]).cpu().item()            
    return total_loss / len(data_source)

In [4]:
# model = TransAm().to(device)
model= torch.load('./best_model_multi10.pt')

valid_loss_list = []

criterion = nn.MSELoss()
lr = 0.000001
#optimizer = torch.optim.SGD(model.parameters(), lr=lr)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.98)

best_val_loss = float("inf")
epochs = 1 # The number of epochs
best_model = None

In [None]:
counter = 0
files_to_consider = 15

while True:
    for dirname, _, filenames in os.walk('./dt11112222/'):
        for filename in filenames:
            path_str = os.path.join(dirname, filename)
            split_lst = path_str.split('.')
            contains = 'p' in split_lst
            if(contains): ## and counter<files_to_consider

                one_plty = filename.split('/')[0].split('.')[0]

                train_data, val_data = get_data2(one_plty)

                for epoch in range(1, epochs + 1):
                    epoch_start_time = time.time()
                    train(train_data)

                    if(epoch % 1 == 0):
                        val_loss = plot_and_loss(model, val_data,epoch,one_plty)
                        predict_future(model, val_data,200,one_plty)
                    else:
                        val_loss = evaluate(model, val_data)

                    print('-' * 89)
                    print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
                                                     val_loss, math.exp(val_loss)))
                    print('-' * 89)
                    
                    torch.save(model,'./best_model_multi11.pt')
                    torch.save(model,f'./inst_wise_model_1x/{one_plty}.pt')
                    
                    valid_loss_list.append({'inst':one_plty,
                                            'loss':val_loss})
                    valid_loss_map_df = pd.DataFrame(valid_loss_list)
                    valid_loss_map_df.to_excel('./valid_loss_map_df_1x.xlsx')
                    
                    
                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        best_model = model
                        torch.save(best_model,'./best_model_multi11_best.pt')
                        model= torch.load('./best_model_multi10_best.pt')

                #src = torch.rand(input_window, batch_size, 1) # (source sequence length,batch size,feature number) 
                #out = model(src)
                #
                #print(out)
                #print(out.shape)

                print(path_str)
                counter+=1
                
    scheduler.step() 

(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21956 batches | lr 0.000001 | 21.97 ms | loss 0.08706 | ppl     1.09
| epoch   1 |  8782/21956 batches | lr 0.000001 | 22.25 ms | loss 0.04857 | ppl     1.05
| epoch   1 | 13173/21956 batches | lr 0.000001 | 22.30 ms | loss 0.04342 | ppl     1.04
| epoch   1 | 17564/21956 batches | lr 0.000001 | 19.18 ms | loss 0.09779 | ppl     1.10
| epoch   1 | 21955/21956 batches | lr 0.000001 | 22.23 ms | loss 0.10806 | ppl     1.11
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 491.94s | valid loss 8.85451 | valid ppl  7005.89
-----------------------------------------------------------------------------------------
./dt11112222/2952193.p
(115877,)
torch.Size([109778, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21955 batches | lr 0.000001 | 22.32 ms | loss 2.96867 | ppl    19.47
| epoch   1 |  8782/21955 batches | lr 0.000001 | 21.40 ms | loss 0.43484 | ppl     1.54
| epoch   1 | 13173/21955 batches | lr 0.000001 | 19.98 ms | loss 0.04881 | ppl     1.05
| epoch   1 | 17564/21955 batches | lr 0.000001 | 22.31 ms | loss 0.21081 | ppl     1.23
| epoch   1 | 21955/21955 batches | lr 0.000001 | 22.27 ms | loss 0.76528 | ppl     2.15
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 493.55s | valid loss 1.65074 | valid ppl     5.21
-----------------------------------------------------------------------------------------
./dt11112222/895745.p
(115858,)
torch.Size([109760, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21952 batches | lr 0.000001 | 19.03 ms | loss 1.72853 | ppl     5.63
| epoch   1 |  8780/21952 batches | lr 0.000001 | 22.27 ms | loss 0.12743 | ppl     1.14
| epoch   1 | 13170/21952 batches | lr 0.000001 | 22.36 ms | loss 1.27592 | ppl     3.58
| epoch   1 | 17560/21952 batches | lr 0.000001 | 22.34 ms | loss 0.82095 | ppl     2.27
| epoch   1 | 21950/21952 batches | lr 0.000001 | 19.18 ms | loss 0.18217 | ppl     1.20
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 479.89s | valid loss 0.21266 | valid ppl     1.24
-----------------------------------------------------------------------------------------
./dt11112222/2585345.p
(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21956 batches | lr 0.000001 | 22.28 ms | loss 3.03065 | ppl    20.71
| epoch   1 |  8782/21956 batches | lr 0.000001 | 22.27 ms | loss 6.38898 | ppl   595.25
| epoch   1 | 13173/21956 batches | lr 0.000001 | 19.51 ms | loss 3.04744 | ppl    21.06
| epoch   1 | 17564/21956 batches | lr 0.000001 | 21.99 ms | loss 0.84512 | ppl     2.33
| epoch   1 | 21955/21956 batches | lr 0.000001 | 22.34 ms | loss 1.93013 | ppl     6.89
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 494.09s | valid loss 3.54701 | valid ppl    34.71
-----------------------------------------------------------------------------------------
./dt11112222/969473.p
(115849,)
torch.Size([109751, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21950 batches | lr 0.000001 | 22.28 ms | loss 1.10250 | ppl     3.01
| epoch   1 |  8780/21950 batches | lr 0.000001 | 19.19 ms | loss 0.64076 | ppl     1.90
| epoch   1 | 13170/21950 batches | lr 0.000001 | 22.29 ms | loss 0.40898 | ppl     1.51
| epoch   1 | 17560/21950 batches | lr 0.000001 | 22.28 ms | loss 0.24901 | ppl     1.28
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 486.88s | valid loss 3.02039 | valid ppl    20.50
-----------------------------------------------------------------------------------------
./dt11112222/1195009.p
(115858,)
torch.Size([109760, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21952 batches | lr 0.000001 | 22.37 ms | loss 5.99139 | ppl   399.97
| epoch   1 |  8780/21952 batches | lr 0.000001 | 22.32 ms | loss 0.33451 | ppl     1.40
| epoch   1 | 13170/21952 batches | lr 0.000001 | 22.34 ms | loss 0.20684 | ppl     1.23
| epoch   1 | 17560/21952 batches | lr 0.000001 | 19.15 ms | loss 0.20000 | ppl     1.22
| epoch   1 | 21950/21952 batches | lr 0.000001 | 22.31 ms | loss 0.24504 | ppl     1.28
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 494.31s | valid loss 6.49502 | valid ppl   661.83
-----------------------------------------------------------------------------------------
./dt11112222/523009.p
(115871,)
torch.Size([109772, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4390/21954 batches | lr 0.000001 | 22.26 ms | loss 2.78019 | ppl    16.12
| epoch   1 |  8780/21954 batches | lr 0.000001 | 21.38 ms | loss 7.48320 | ppl  1777.91
| epoch   1 | 13170/21954 batches | lr 0.000001 | 20.09 ms | loss 4.37720 | ppl    79.61
| epoch   1 | 17560/21954 batches | lr 0.000001 | 22.30 ms | loss 1.28243 | ppl     3.61
| epoch   1 | 21950/21954 batches | lr 0.000001 | 22.29 ms | loss 0.77431 | ppl     2.17
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 493.77s | valid loss 5.63163 | valid ppl   279.12
-----------------------------------------------------------------------------------------
./dt11112222/3675137.p
(115879,)
torch.Size([109780, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21956 batches | lr 0.000001 | 19.15 ms | loss 6.31850 | ppl   554.74
| epoch   1 |  8782/21956 batches | lr 0.000001 | 22.32 ms | loss 2.75774 | ppl    15.76
| epoch   1 | 13173/21956 batches | lr 0.000001 | 22.33 ms | loss 0.23490 | ppl     1.26
| epoch   1 | 17564/21956 batches | lr 0.000001 | 22.31 ms | loss 0.19592 | ppl     1.22
| epoch   1 | 21955/21956 batches | lr 0.000001 | 19.16 ms | loss 0.25604 | ppl     1.29
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 480.36s | valid loss 8.06208 | valid ppl  3171.89
-----------------------------------------------------------------------------------------
./dt11112222/951809.p
(115853,)
torch.Size([109755, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21951 batches | lr 0.000001 | 22.33 ms | loss 3.45944 | ppl    31.80
| epoch   1 |  8780/21951 batches | lr 0.000001 | 22.31 ms | loss 0.93366 | ppl     2.54
| epoch   1 | 13170/21951 batches | lr 0.000001 | 19.10 ms | loss 0.52081 | ppl     1.68
| epoch   1 | 17560/21951 batches | lr 0.000001 | 22.32 ms | loss 3.08540 | ppl    21.88
| epoch   1 | 21950/21951 batches | lr 0.000001 | 22.30 ms | loss 3.86938 | ppl    47.91
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 493.85s | valid loss 1.23359 | valid ppl     3.43
-----------------------------------------------------------------------------------------
./dt11112222/98049.p
(115853,)
torch.Size([109755, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21951 batches | lr 0.000001 | 22.34 ms | loss 1.88149 | ppl     6.56
| epoch   1 |  8780/21951 batches | lr 0.000001 | 19.14 ms | loss 0.81593 | ppl     2.26
| epoch   1 | 13170/21951 batches | lr 0.000001 | 22.34 ms | loss 0.40002 | ppl     1.49
| epoch   1 | 17560/21951 batches | lr 0.000001 | 22.34 ms | loss 0.27405 | ppl     1.32
| epoch   1 | 21950/21951 batches | lr 0.000001 | 21.53 ms | loss 1.59346 | ppl     4.92
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 486.39s | valid loss 0.76224 | valid ppl     2.14
-----------------------------------------------------------------------------------------
./dt11112222/558337.p
(115876,)
torch.Size([109777, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21955 batches | lr 0.000001 | 22.38 ms | loss 6.01780 | ppl   410.67
| epoch   1 |  8782/21955 batches | lr 0.000001 | 22.29 ms | loss 0.59537 | ppl     1.81
| epoch   1 | 13173/21955 batches | lr 0.000001 | 19.56 ms | loss 0.22156 | ppl     1.25
| epoch   1 | 17564/21955 batches | lr 0.000001 | 22.32 ms | loss 0.48233 | ppl     1.62
| epoch   1 | 21955/21955 batches | lr 0.000001 | 22.33 ms | loss 1.89659 | ppl     6.66
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 496.24s | valid loss 0.30848 | valid ppl     1.36
-----------------------------------------------------------------------------------------
./dt11112222/3365633.p
(115852,)
torch.Size([109754, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21950 batches | lr 0.000001 | 21.58 ms | loss 0.33747 | ppl     1.40
| epoch   1 |  8780/21950 batches | lr 0.000001 | 19.81 ms | loss 0.30606 | ppl     1.36
| epoch   1 | 13170/21950 batches | lr 0.000001 | 22.30 ms | loss 3.14155 | ppl    23.14
| epoch   1 | 17560/21950 batches | lr 0.000001 | 22.31 ms | loss 0.26959 | ppl     1.31
| epoch   1 | 21950/21950 batches | lr 0.000001 | 20.40 ms | loss 1.05788 | ppl     2.88
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 483.11s | valid loss 0.47904 | valid ppl     1.61
-----------------------------------------------------------------------------------------
./dt11112222/108033.p
(115876,)
torch.Size([109777, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21955 batches | lr 0.000001 | 22.36 ms | loss 2.96156 | ppl    19.33
| epoch   1 |  8782/21955 batches | lr 0.000001 | 22.34 ms | loss 0.45293 | ppl     1.57
| epoch   1 | 13173/21955 batches | lr 0.000001 | 22.35 ms | loss 0.86494 | ppl     2.37
| epoch   1 | 17564/21955 batches | lr 0.000001 | 19.07 ms | loss 0.32113 | ppl     1.38
| epoch   1 | 21955/21955 batches | lr 0.000001 | 22.32 ms | loss 0.36909 | ppl     1.45
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 494.08s | valid loss 5.04237 | valid ppl   154.84
-----------------------------------------------------------------------------------------
./dt11112222/3834113.p
(115880,)
torch.Size([109781, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21956 batches | lr 0.000001 | 22.31 ms | loss 1.08517 | ppl     2.96
| epoch   1 |  8782/21956 batches | lr 0.000001 | 19.31 ms | loss 3.74719 | ppl    42.40
| epoch   1 | 13173/21956 batches | lr 0.000001 | 22.18 ms | loss 1.31955 | ppl     3.74
| epoch   1 | 17564/21956 batches | lr 0.000001 | 22.35 ms | loss 3.67931 | ppl    39.62
| epoch   1 | 21955/21956 batches | lr 0.000001 | 22.32 ms | loss 0.45328 | ppl     1.57
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 494.21s | valid loss 0.48004 | valid ppl     1.62
-----------------------------------------------------------------------------------------
./dt11112222/4278529.p
(89759,)
torch.Size([84966, 2, 300])
torch.Size([4183, 2, 300])




| epoch   1 |  3398/16993 batches | lr 0.000001 | 18.23 ms | loss 1.88189 | ppl     6.57
| epoch   1 |  6796/16993 batches | lr 0.000001 | 22.27 ms | loss 1.25895 | ppl     3.52
| epoch   1 | 10194/16993 batches | lr 0.000001 | 22.33 ms | loss 0.18912 | ppl     1.21
| epoch   1 | 13592/16993 batches | lr 0.000001 | 22.35 ms | loss 0.78603 | ppl     2.19
| epoch   1 | 16990/16993 batches | lr 0.000001 | 22.31 ms | loss 0.45545 | ppl     1.58
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 377.11s | valid loss 5.06591 | valid ppl   158.53
-----------------------------------------------------------------------------------------
./dt11112222/4488705.p
(115870,)
torch.Size([109771, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4390/21954 batches | lr 0.000001 | 21.03 ms | loss 1.01931 | ppl     2.77
| epoch   1 |  8780/21954 batches | lr 0.000001 | 22.29 ms | loss 0.57503 | ppl     1.78
| epoch   1 | 13170/21954 batches | lr 0.000001 | 22.30 ms | loss 0.26771 | ppl     1.31
| epoch   1 | 17560/21954 batches | lr 0.000001 | 19.15 ms | loss 0.96141 | ppl     2.62
| epoch   1 | 21950/21954 batches | lr 0.000001 | 22.33 ms | loss 2.17417 | ppl     8.79
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 488.49s | valid loss 0.94827 | valid ppl     2.58
-----------------------------------------------------------------------------------------
./dt11112222/3924993.p
(115857,)
torch.Size([109759, 2, 300])
torch.Size([5488, 2, 300])




| epoch   1 |  4390/21951 batches | lr 0.000001 | 22.35 ms | loss 10.69845 | ppl 44287.27
| epoch   1 |  8780/21951 batches | lr 0.000001 | 22.28 ms | loss 6.97945 | ppl  1074.33
| epoch   1 | 13170/21951 batches | lr 0.000001 | 19.07 ms | loss 8.19106 | ppl  3608.56
| epoch   1 | 17560/21951 batches | lr 0.000001 | 22.33 ms | loss 9.80266 | ppl 18081.84
| epoch   1 | 21950/21951 batches | lr 0.000001 | 22.29 ms | loss 3.02770 | ppl    20.65
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 493.70s | valid loss 1.08590 | valid ppl     2.96
-----------------------------------------------------------------------------------------
./dt11112222/3039233.p
(115881,)
torch.Size([109781, 2, 300])
torch.Size([5490, 2, 300])




| epoch   1 |  4391/21956 batches | lr 0.000001 | 19.14 ms | loss 0.19953 | ppl     1.22
| epoch   1 |  8782/21956 batches | lr 0.000001 | 22.29 ms | loss 1.35841 | ppl     3.89
| epoch   1 | 13173/21956 batches | lr 0.000001 | 22.32 ms | loss 5.93341 | ppl   377.44
| epoch   1 | 17564/21956 batches | lr 0.000001 | 22.29 ms | loss 0.29524 | ppl     1.34
| epoch   1 | 21955/21956 batches | lr 0.000001 | 19.05 ms | loss 3.06738 | ppl    21.49
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 479.30s | valid loss 4.12217 | valid ppl    61.69
-----------------------------------------------------------------------------------------
./dt11112222/975873.p
(115832,)
torch.Size([109735, 2, 300])
torch.Size([5487, 2, 300])




| epoch   1 |  4389/21947 batches | lr 0.000001 | 22.30 ms | loss 8.83615 | ppl  6878.45
| epoch   1 |  8778/21947 batches | lr 0.000001 | 22.30 ms | loss 4.03596 | ppl    56.60
| epoch   1 | 13167/21947 batches | lr 0.000001 | 20.61 ms | loss 1.18982 | ppl     3.29
| epoch   1 | 17556/21947 batches | lr 0.000001 | 21.13 ms | loss 0.24208 | ppl     1.27
| epoch   1 | 21945/21947 batches | lr 0.000001 | 21.16 ms | loss 0.19383 | ppl     1.21
-----------------------------------------------------------------------------------------
| end of epoch   1 | time: 489.80s | valid loss 2.37808 | valid ppl    10.78
-----------------------------------------------------------------------------------------
./dt11112222/1793.p
(115877,)
torch.Size([109778, 2, 300])
torch.Size([5489, 2, 300])




| epoch   1 |  4391/21955 batches | lr 0.000001 | 22.29 ms | loss 1.65856 | ppl     5.25
| epoch   1 |  8782/21955 batches | lr 0.000001 | 22.32 ms | loss 1.77290 | ppl     5.89


In [None]:
torch.save(model,'./best_model_multi11.pt')

In [None]:
print('sdfsdf')

In [None]:
train_data.shape

In [None]:
val_data.shape

In [None]:
# for epoch in range(1, epochs + 1):
#     epoch_start_time = time.time()
#     train(train_data)
    
#     if(epoch % 1 == 0):
#         val_loss = plot_and_loss(model, val_data,epoch)
#         predict_future(model, val_data,200)
#     else:
#         val_loss = evaluate(model, val_data)
        
#     print('-' * 89)
#     print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
#                                      val_loss, math.exp(val_loss)))
#     print('-' * 89)

#     if val_loss < best_val_loss:
#         best_val_loss = val_loss
#         best_model = model
#         torch.save(best_model,'./best_model_multi.pt')

#     scheduler.step() 

# #src = torch.rand(input_window, batch_size, 1) # (source sequence length,batch size,feature number) 
# #out = model(src)
# #
# #print(out)
# #print(out.shape)

In [None]:
# src = torch.rand(30, batch_size, 30) # (source sequence length,batch size,feature number) 
# src = src.to(device="cuda")
# # out = gf(val_data[0])
# print(src.shape)
# out = model(src)

# print(out)
# print(out.shape)

In [None]:
print('hi')

In [None]:
# gf= torch.load('../input/dt11112222/best_model_multi.pt')