In [None]:
import torch as t
import torch 
import pickle
from torch import nn, Tensor 
import torch, math


from utils import PositionalEncoding

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print(torch.cuda.get_device_name(0))


In [None]:
# sequence_length, batch_length, embed_dim

embed_dim = 100
length = 110

t_encoders = 3 # number of Transformer encoder blocks that are stacked

# parameter for each block (same for every)
encoder_params = {'d_model':embed_dim, # DO NOT CHANGE 
                  'batch_first': True, # DO NOT CHANGE
                  'nhead': 5, # number of heads on multi-attention
                  'dropout':0.0, # dropout probability
                  'dim_feedforward': 1000 # neurons in hidden feedforward layers
                 }

PELayer = []

PE = True 
 
if PE==True: PELayer.append(PositionalEncoding(d_model=embed_dim, max_len=length))
t_enc_layers = [t.nn.TransformerEncoderLayer(**encoder_params) 
                                            for k in range(t_encoders)]

#note that we need to flatten and have a final linear layer for the output as the
# encoder layers by default output batches of size [seq_len, embed_dim]. This 
# is simply because that makes it easier to do things for seq2seq models 
# (standard transformers) 

myNet = t.nn.Sequential(*PELayer, 
                        *t_enc_layers,
                        t.nn.Flatten(start_dim=1, end_dim=-1), 
                        t.nn.Linear(length*embed_dim, 1))



In [None]:
device = 'cuda' 

import torch as t
from torch.utils.data import DataLoader, TensorDataset
import os
import glob

class DREAMLazyData(TensorDataset):
    def __init__(self, device=device):
        super().__init__()
        self.path = os.getcwd() + '/jake/pytorch-tensors-all/'
        
    def __getitem__(self, item):
        zero_pad = (5 - len(str(item)))
        
        basename = 'Batch' + ('0'*zero_pad) + str(item)
        Xfile, yfile = basename + 'X.pt', basename + 'y.pt' 
        
        X, y = t.load(self.path + Xfile).to(device), t.load(self.path + yfile).to(device)
        return X, y
    
    def __len__(self):
         return (len(glob.glob1(self.path, "*.pt")))//2 - 1000
    
myData = DREAMLazyData()

ldX = DataLoader(myData, batch_size=None, 
                 shuffle=True, 
                 generator=torch.Generator(device=device))

test_X, test_y = iter(ldX).next()

In [None]:
myLoss = t.nn.MSELoss()

def evaluate(myModel, start, end):
    val_loss = 0
    for i in range(start, end): 
        test_X, test_y = myData.__getitem__(i)

        with t.no_grad():  
            pred_test = myModel(test_X).reshape(-1)
            val_loss += round(myLoss(test_y, pred_test).item(),2)
        
    return val_loss/(end - start)


val_loss =  evaluate(myModel=myNet, start=5750, end=5775)  
print("Validation Loss:", round(val_loss,2))

In [None]:
optim = t.optim.Adam(params = myNet.parameters())
rt = 0

num_epochs = 20
batch_size = 5
num_iters = 1000 // batch_size

for i in range(num_epochs):
    for k in range(myData.__len__()):
        with t.no_grad():
            if k % 500 == 0 and (k>0 or i>0): 
                
                val_loss = round(evaluate(myNet,  start=5750, end=5775),2)
                print('VALIDATION: ', val_loss)
            
                filename = os.getcwd() + '/saved_models/'+'DREAMnet_E' \
                                    +str(i+1)+'_F'+ str(k) \
                                    + '__train_' + str(str(round(rt, 2))) \
                                    + '_Val_' + str(val_loss) + '.pt' 
                
                t.save(myNet, filename)
        
        train_str = ', prev. avg. train: ' + str(round(rt, 2)) if k > 0 else ' '
        
        print('Epoch', str(i+1) + ',', 'File', str(k+1) + '/' + str(myData.__len__())
                 + train_str) 
        rt = 0
        _X, _y = iter(ldX).next()

        batchData = TensorDataset(_X, _y)
        ldBatch = iter(DataLoader(batchData, batch_size=batch_size,
                                generator=torch.Generator(device='cuda'),
                                  shuffle=True))

        for j in range(len(ldBatch)):
            optim.zero_grad() 

            __X, __y = ldBatch.next() 
            pred_y = myNet(__X).reshape(-1)  

            loss = myLoss(pred_y, __y)
            loss.backward()

            rt += round(loss.item(),2)/num_iters
            optim.step()
            
                     