import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim
import torch.utils.data as du
from torch.utils.data import Dataset
from joblib import load

read data

In [2]:
class JUND_Dataset(Dataset):
    def __init__(self, data_dir):
        '''load X, y, w, a from data_dir'''        
        super(JUND_Dataset, self).__init__()

        # load X, y, w, a from given data_dir
        # convert them into torch tensors
        x = load(data_dir + 'shard-0-X.joblib')
        y = load(data_dir +'shard-0-y.joblib')
        w = load(data_dir + 'shard-0-w.joblib')
        a = load(data_dir + 'shard-0-a.joblib')

        self.x = torch.tensor(x).float()  
        self.y = torch.tensor(y).float()
        self.w = torch.tensor(w).float()
        self.a = torch.tensor(a).float()
        
    def __len__(self):
        '''return len of dataset'''
        
        return len(self.y)
    
    def __getitem__(self, idx):
        '''return X, y, w, and a values at index idx'''
        _x = self.x[idx]
        _y = self.y[idx]
        _w = self.w[idx]
        _a = self.a[idx]
        
        return _x, _y, _w, _a

define a Multi Layer Perceptron model

In [3]:
class LSTM(nn.Module):
    def __init__(self, input_dim, LSTM_hidden_dim, LSTM_layers, mlp_hidden_dim, out_dim):
        '''in_dim: input layer dim
           hidden_dim: hidden layer dim
           out_dim: output layer dim'''
        
        super().__init__()
        # self.flatten = nn.Flatten()
        self.LSTM_layers = LSTM_layers
        self.hidden_units = LSTM_hidden_dim
        
        self.fc1 = nn.LSTM(
            input_size=input_dim,
            hidden_size=LSTM_hidden_dim,
            num_layers=LSTM_layers,
            batch_first=True
        )
        self.fc2 = nn.Linear(LSTM_hidden_dim, mlp_hidden_dim)
        self.fc3 = nn.Linear(mlp_hidden_dim + 1, out_dim)

    def forward(self, x, a):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.LSTM_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.LSTM_layers, batch_size, self.hidden_units).requires_grad_()
        
        _, (hn, _) = self.fc1(x, (h0.to(device), c0.to(device)))
        x = self.fc2(hn[0])
        x = functional.relu(x)
        x = torch.cat((x, a), dim=1)
        x = self.fc3(x)
        
        return x

Set up training & hyperparameters

In [4]:
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f"using device: {device}")

'''
batch_size_list = [500, 1000, 1500]
learning_rate_list = [0.1, 0.5, 1]
epochs_list = [10, 20, 30, 40, 50]
'''

batch_size_list = [2000]
learning_rate_list = [0.04, 0.05]
epochs_list = [30]


using device: cuda:0


Training and validating using different hyperparameter configurations

In [5]:
# set model and optimizer
# 104*4 as inputs
# use 128 hidden layer + 1 accessibility value node
# output is binary (0, 1)

hyperparams = {}

for batch in batch_size_list:
    for rate in learning_rate_list:
        for epoch in epochs_list:
            batch_size = batch
            learning_rate = rate
            epochs = epoch

            model = LSTM(4, 128, 3, 64, 1)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            # load training data in batches
            train_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/train_dataset/'),
                                         batch_size=batch_size,
                                         shuffle=True)
            # send model over to device
            model = model.to(device)

            '''
            training
            '''
            sum_loss = 0
            model.train()
            for epoch in range(1, epochs + 1):
                sum_loss = 0.

                for batch_idx, data in enumerate(train_loader):
                    # send batch over to device

                    x, y, w, a = data
                    # print(x.shape)
                    x, y, w, a = x.to(device), y.to(
                        device), w.to(device), a.to(device)

                    # zero out prev gradients
                    optimizer.zero_grad()

                    # run the forward pass
                    output = model(x, a)

                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)

                    # sum up batch losses
                    sum_loss += loss.item()

                    # compute gradients and take a step
                    loss.backward()
                    optimizer.step()

                # average loss per example
                sum_loss /= len(train_loader.dataset)
                # print(sum_loss)  

            '''
            te$ting
            '''
            # load test images in batches
            test_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/test_dataset/'),
                                        batch_size=batch_size,
                                        shuffle=True)

            # set model in eval mode, since we are no longer training
            model.eval()
            test_loss = 0
            correct = 0
            weighted_correct = 0

            # turn off gradient computation, will speed up testing
            with torch.no_grad():
                for batch_idx, data in enumerate(test_loader):
                    # send batches to device
                    x, y, w, a = data
                    x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)

                    # compute forward pass
                    output = model(x, a)

                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)

                    # sum up batch loss
                    test_loss += loss.item()

                    # class-wise classification using sigmoid
                    m = nn.Sigmoid()
                    output = m(output)
                    output = torch.where(output < 0.5, output, torch.tensor(1.).to(device))
                    output = torch.where(output == 1.0, output,
                                        torch.tensor(0.).to(device))

                    # add up number of correct predictions
                    correct += torch.sum(output == y)

                    diff = (output == y)

                    # weighted accuracy of the predictions
                    weighted_correct += torch.sum(torch.mul(w, diff))

                # test loss per example
                test_loss /= len(test_loader.dataset)

                # final test accuracy
                test_acc = weighted_correct / len(test_loader.dataset)
                print(f'Test loss: {test_loss:.5f} / weighted accuracy: {test_acc:.5f} /',
                    f'correct: {correct} out of {len(test_loader.dataset)} test points')


Test loss: 0.00030 / weighted accuracy: 0.73444 / correct: 28461 out of 34528 test points
Test loss: 0.00029 / weighted accuracy: 0.73052 / correct: 29129 out of 34528 test points


Testing

In [6]:

'''
te$ting
'''
# load test images in batches
test_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/test_dataset/'),
                            batch_size=batch_size,
                            shuffle=True)

# set model in eval mode, since we are no longer training
model.eval()
test_loss = 0
correct = 0
weighted_correct = 0

# turn off gradient computation, will speed up testing
with torch.no_grad():
    for batch_idx, data in enumerate(test_loader):
        # send batches to device
        x, y, w, a = data
        x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)
        
        # compute forward pass
        output = model(x, a)

        # compute loss/error
        loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)
        
        # sum up batch loss
        test_loss += loss.item()

        # class-wise classification using sigmoid
        m = nn.Sigmoid()
        output = m(output)
        output = torch.where(output<0.5, output, torch.tensor(1.).to(device))
        output = torch.where(output==1.0, output, torch.tensor(0.).to(device))

        # add up number of correct predictions
        correct += torch.sum(output == y)
  
        diff = (output == y)

        # weighted accuracy of the predictions
        weighted_correct += torch.sum(torch.mul(w, diff))
        
    # test loss per example
    test_loss /= len(test_loader.dataset)
    
    # final test accuracy
    test_acc = weighted_correct / len(test_loader.dataset)
    print(f'Test loss: {test_loss:.5f} / weighted accuracy: {test_acc:.5f} /',
          f'correct: {correct} out of {len(test_loader.dataset)} test points')

Test loss: 0.00029 / weighted accuracy: 0.73052 / correct: 29129 out of 34528 test points
