In [1]:
import torch # import pytorch
import torch.nn as nn # pytorch's neural networks module
import torch.nn.functional as F # functional module.

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device_id = 0 if torch.cuda.is_available() else 'cpu' # Equivalent to device_id = 'cuda:0'
device = torch.device(device_id) # use these semantics to specify a specific device. 
device

device(type='cuda', index=0)

In [3]:
class VanillaRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layer, out_size):
        super(VanillaRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layer = num_layer
        self.rnn = nn.RNN(input_size, hidden_size, num_layer, batch_first=True)
        self.fc = nn.Linear(hidden_size, out_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        out, hn = self.rnn(x)
        out = self.fc(out[:, -1, :])
        out = self.softmax(out)
        return out

In [4]:
import numpy as np

In [5]:
X_train_valid = np.load("./Data/X_train_valid.npy")
y_train_valid = np.load("./Data/y_train_valid.npy")
y_train_valid = (y_train_valid - np.min(y_train_valid)).astype('int')

In [6]:
# Split the data into training and validation sets
from sklearn.model_selection import train_test_split
X_train_np, X_val_np, y_train_np, y_val_np = train_test_split(X_train_valid, y_train_valid, test_size=0.2, random_state=12345)
X_train = torch.FloatTensor(X_train_np.transpose(0, 2, 1)).to(device)
X_val = torch.FloatTensor(X_val_np.transpose(0, 2, 1)).to(device)
y_train = torch.LongTensor(y_train_np).to(device)
y_val = torch.LongTensor(y_val_np).to(device)

In [7]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        if isinstance(X, np.ndarray):
            self.X = torch.FloatTensor(X) # 32-bit float
        else:
            self.X = X
        if isinstance(Y, np.ndarray):
            self.Y = torch.LongTensor(Y) # integer type
        else:
            self.Y = Y
        return
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        return self.X[index], self.Y[index]

In [8]:
batch_size = 64
train_dataset = MyDataset(X_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=True)

val_dataset = MyDataset(X_val, y_val)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size, shuffle=False)

In [9]:
import tqdm # for progress bar

In [10]:
# Set the random seeds.
# Sometimes some operations are not deterministic.
# See https://pytorch.org/docs/stable/notes/randomness.html
#   and https://pytorch.org/docs/stable/generated/torch.use_deterministic_algorithms.html
torch.manual_seed(12345) # set the random seed for pytorch

model = VanillaRNN(22, 128, 3, 4).to(device)
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.NLLLoss()

loss_hist = []
val_loss_hist = []
acc_hist = []
val_acc_hist = []

In [11]:
for j in train_dataloader:
    x = j[0].to(device)
    print(x.shape)
    output = model(x)    
    print(output.shape)
    break

torch.Size([64, 1000, 22])
torch.Size([64, 4])


In [12]:
num_epochs = 30

pbar = tqdm.tqdm(
    range(num_epochs), position=0, leave=True,
    bar_format='{l_bar}{bar:30}{r_bar}',
)
for epoch in pbar:
    model.train() # set model to training mode.
    for batch in train_dataloader:
        optimizer.zero_grad() # clear gradients of parameters that optimizer is optimizing
        x, y = batch

        # x = x.to(device) # necessary if X is not on the same device as model
        # y = y.to(device)
        
        model.zero_grad()

        out = model(x) # shape (batch_size, n_classes)
        loss = criterion(out, y) # calculate the cross entropy loss

        loss.backward() # backpropagate
        optimizer.step() # perform optimization step

        # IMPORTANT: DO NOT store 'loss' by itself, since it references its entire computational graph.
        # Otherwise you will run out of memory.
        # You MUST use .item() to convert to a scalar or call .detach().
        loss_hist.append(loss.item())
    
    model.eval() # set model to evaluation mode. Relevant for dropout, batchnorm, layernorm, etc.
    # calculate accuracy for training and validation sets
    ns = 0 # number of samples
    nc = 0 # number of correct outputs
    with torch.no_grad():
        for batch in train_dataloader:
            x, y = batch
            out = model(x)
            ns += len(y)
            nc += (out.max(1)[1] == y).detach().cpu().numpy().sum()
    acc_hist.append(nc/ns)

    ns = 0 # number of samples
    nc = 0 # number of correct outputs
    with torch.no_grad():
        for batch in val_dataloader:
            x, y = batch
            out = model(x)
            loss = criterion(out, y)
            val_loss_hist.append(loss.item())
            ns += len(y)
            nc += (out.max(1)[1] == y).detach().cpu().numpy().sum()
    val_acc_hist.append(nc/ns)


    # update progress bar postfix
    pbar.set_postfix({'acc': acc_hist[-1], 'val_acc': val_acc_hist[-1]})

100%|██████████████████████████████| 30/30 [00:08<00:00,  3.53it/s, acc=1, val_acc=0.262]    
