In [50]:
import torch
import torch.nn as nn
import torch.utils.data as data
import os
import numpy as np
import json

In [51]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [52]:
class JsonDataset(data.Dataset):
    def __init__(self, data_path):
        f = open(data_path, 'r')
        self.data = json.loads(f.read())
        f.close()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return torch.FloatTensor(self.data[index][0]), \
            torch.FloatTensor(self.data[index][1])

In [53]:
train_data = JsonDataset('/kaggle/input/cpsc490/new-multi-rnn-train.json')
validation_data = JsonDataset('/kaggle/input/cpsc490/new-multi-rnn-validation.json')
test_data = JsonDataset('/kaggle/input/cpsc490/new-multi-rnn-test.json')

In [54]:
x_seq_size = train_data[0][0].shape[1]
rnn_num_layers = 2

batch_size = 64

In [55]:
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 1, 'pin_memory': True}
train_loader = data.DataLoader(train_data, **params)
validation_loader = data.DataLoader(validation_data, **params)
test_loader = data.DataLoader(test_data, **params)

In [81]:
class RNN(nn.Module):
    def __init__(self, x_seq_size, rnn_num_layers):
        super(RNN, self).__init__()
        
        self.rnn_num_layers = rnn_num_layers

        self.rnn = nn.LSTM(x_seq_size, x_seq_size, rnn_num_layers,
                          batch_first = True)

    def forward(self, x_seq):
        h0 = torch.zeros(self.rnn_num_layers, x_seq.shape[0], x_seq.shape[2]).to(device)
        c0 = torch.zeros(self.rnn_num_layers, x_seq.shape[0], x_seq.shape[2]).to(device)
        
        out, _ = self.rnn(x_seq, (h0, c0))
        return out[:, -1, :]

In [86]:
def train(model, criterion, optimizer):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for i, (x_seq, targets) in enumerate(train_loader):
        x_seq = x_seq.to(device)
        targets = targets.to(device)

        outputs = model(x_seq)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total += targets.shape[0]
        train_loss += loss.item() * targets.shape[0]
        #_, predicted = outputs.max(1)
        #correct += predicted.eq(targets).sum().item()
        
    epoch_train_loss = train_loss / total
    #epoch_train_acc = float(100 * correct / total)

    return epoch_train_loss

In [83]:
def validation(model, criterion):
    model.eval()
    validation_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for i, (x_seq, targets) in enumerate(validation_loader):
            x_seq = x_seq.to(device)
            targets = targets.to(device)

            outputs = model(x_seq)
            loss = criterion(outputs, targets)

        total += targets.shape[0]
        validation_loss += loss.item() * targets.shape[0]
        #_, predicted = outputs.max(1)
        #correct += predicted.eq(targets).sum().item()
        
    epoch_validation_loss = validation_loss / total
    #epoch_validation_acc = float(100 * correct / total)

    return epoch_validation_loss

In [94]:
model = RNN(x_seq_size, rnn_num_layers).to(device)
criterion = nn.MSELoss().to(device)
#criterion = nn.CrossEntropyLoss().to(device)
#criterion = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  

num_epochs = 1000

In [95]:
# Train
best_validation_loss = None

for epoch in range(0, num_epochs):
    epoch_train_loss = train(model, criterion, optimizer)
    epoch_validation_loss = validation(model, criterion)
    
    if best_validation_loss == None or epoch_validation_loss < best_validation_loss:
        torch.save(model.state_dict(), 'best_rnn.pth')
        print('Saved.')
        best_validation_loss = epoch_validation_loss

    print('Epoch {}. Training loss: {}. Validation loss: {}.'.format(epoch + 1, 
                                                                format(epoch_train_loss, '.6f'), 
                                                                format(epoch_validation_loss, '.6f')))
    

Saved.
Epoch 1. Training loss: 0.104540. Validation loss: 0.073665.
Saved.
Epoch 2. Training loss: 0.059376. Validation loss: 0.042099.
Saved.
Epoch 3. Training loss: 0.033428. Validation loss: 0.025508.
Saved.
Epoch 4. Training loss: 0.018332. Validation loss: 0.012007.
Epoch 5. Training loss: 0.011420. Validation loss: 0.012806.
Saved.
Epoch 6. Training loss: 0.008176. Validation loss: 0.005974.
Epoch 7. Training loss: 0.006614. Validation loss: 0.008188.
Saved.
Epoch 8. Training loss: 0.005730. Validation loss: 0.002231.
Epoch 9. Training loss: 0.005160. Validation loss: 0.003986.
Epoch 10. Training loss: 0.004723. Validation loss: 0.006208.
Epoch 11. Training loss: 0.004380. Validation loss: 0.004380.
Saved.
Epoch 12. Training loss: 0.004100. Validation loss: 0.002048.
Epoch 13. Training loss: 0.003880. Validation loss: 0.005381.
Saved.
Epoch 14. Training loss: 0.003699. Validation loss: 0.001533.
Epoch 15. Training loss: 0.003525. Validation loss: 0.002999.
Epoch 16. Training loss

In [93]:
# Test
model.load_state_dict(torch.load('best_rnn.pth'))

with torch.no_grad():
    sum_test_loss = 0
    total = 0
    for i, (x_seq, targets) in enumerate(test_loader):
        x_seq = x_seq.to(device)
        targets = targets.to(device)
        #targets = targets.reshape(-1, 1).to(device)

        outputs = model(x_seq)
        loss = criterion(outputs, targets)

        total += targets.shape[0]
        sum_test_loss += loss.item() * targets.shape[0]
        #_, predicted = outputs.max(1)
        #correct += predicted.eq(targets).sum().item()
        
    test_loss = sum_test_loss / total

print(test_loss)

0.0018615394526277668
