import libraries

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim
import torch.utils.data as du
from torch.utils.data import Dataset
from joblib import load

read data

In [8]:
class JUND_Dataset(Dataset):
    def __init__(self, data_dir):
        '''load X, y, w, a from data_dir'''        
        super(JUND_Dataset, self).__init__()

        # load X, y, w, a from given data_dir
        # convert them into torch tensors
        x = load(data_dir + 'shard-0-X.joblib')
        y = load(data_dir +'shard-0-y.joblib')
        w = load(data_dir + 'shard-0-w.joblib')
        a = load(data_dir + 'shard-0-a.joblib')

        self.x = torch.tensor(x).float()  
        self.y = torch.tensor(y).float()
        self.w = torch.tensor(w).float()
        self.a = torch.tensor(a).float()
        
    def __len__(self):
        '''return len of dataset'''
        
        return len(self.y)
    
    def __getitem__(self, idx):
        '''return X, y, w, and a values at index idx'''
        _x = self.x[idx]
        _y = self.y[idx]
        _w = self.w[idx]
        _a = self.a[idx]
        
        return _x, _y, _w, _a

define a Multi Layer Perceptron model

In [9]:
class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        '''in_dim: input layer dim
           hidden_dim: hidden layer dim
           out_dim: output layer dim'''
        
        super(MLP, self).__init__()
        
        # images are 28x28 so flatten them into 784d vec
        self.flatten = nn.Flatten()
        
        #two fully connected layers
        self.fc1 = nn.Linear(in_dim, hidden_dim-1)
        self.fc2 = nn.Linear(hidden_dim, out_dim)

    def forward(self, x, a):
        # since x is 28x28, flatten it first
        x = self.flatten(x)
        
        # compute output of fc1, and apply relu activation
        x = functional.relu(self.fc1(x))
        
        # compute output layer
        # no activation
        x = torch.cat((x, a), dim=1)
        x = self.fc2(x)
        return x

Set up training & hyperparameters

In [10]:
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f"using device: {device}")

batch_size_list = [500, 1000, 1500]
learning_rate_list = [0.1, 0.5, 1]
epochs_list = [10, 20, 30, 40, 50]

using device: cuda:0


Training and validating using different hyperparameter configurations

In [11]:
# set model and optimizer
# 104*4 as inputs
# use 128 hidden layer + 1 accessibility value node
# output is binary (0, 1)

hyperparams = {}

for batch in batch_size_list:
    for rate in learning_rate_list:
        for epoch in epochs_list:
            batch_size = batch
            learning_rate = rate
            epochs = epoch
            
            model = MLP(101*4, 129, 1)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            
            # load training data in batches
            train_loader = du.DataLoader(dataset=JUND_Dataset('train_dataset/'),
                                         batch_size=batch_size,
                                         shuffle=True)
            # send model over to device
            model = model.to(device)

            '''
            training
            '''
            sum_loss = 0
            model.train()
            for epoch in range(1, epochs + 1):    
                sum_loss = 0.

                for batch_idx, data in enumerate(train_loader):
                    # send batch over to device
                    x, y, w, a = data
                    x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)

                    # zero out prev gradients
                    optimizer.zero_grad()

                    # run the forward pass
                    output = model(x, a)

                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)


                    # sum up batch losses
                    sum_loss += loss.item()

                    # compute gradients and take a step
                    loss.backward()
                    optimizer.step()

                # average loss per example    
                sum_loss /= len(train_loader.dataset)
                        
            '''
            validation
            '''
            # load validation images in batches
            valid_loader = du.DataLoader(dataset=JUND_Dataset('valid_dataset/'),
                                        batch_size=batch_size,
                                        shuffle=True)

            # set model in eval mode, since we are no longer training
            model.eval()
            weighted_accuracy = 0
            test_loss = 0
            
            # turn off gradient computation, will speed up testing
            with torch.no_grad():
                for batch_idx, data in enumerate(valid_loader):
                    # send batches to device
                    x, y, w, a = data
                    x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)

                    # compute forward pass
                    output = model(x, a)

                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)

                    # sum up batch loss
                    test_loss += loss.item()

                    # get the index/class of the max log-probability
                    # output = F.log_softmax(output, dim=1)
                    # pred = output.max(dim=1)[1]

                    m = nn.Sigmoid()
                    output = m(output)

                    output = torch.where(output<0.5, output, torch.tensor(1.).to(device))
                    output = torch.where(output==1.0, output, torch.tensor(0.).to(device))

                    diff = (output == y)
                    weighted_accuracy += torch.sum(torch.mul(w, diff))

                # final test accuracy
                valid_acc = weighted_accuracy / len(valid_loader.dataset)
                
                print(f'batch size: {batch_size:d}, learning rate: {learning_rate:.4f}, epoch: {epochs:d}')
                print(f'validation weighted accuracy: {valid_acc:.4f}\n')
                
                hyperparams[valid_acc] = [batch_size, learning_rate, epochs]
     
min_loss = max(hyperparams.keys())
best_batch_size = hyperparams[min_loss][0]
best_learning_rate = hyperparams[min_loss][1]
best_epochs = hyperparams[min_loss][2]

print(
    f'Best hyperparams: <batch size: {best_batch_size:d}, learning rate: {best_learning_rate:.4f}, epoch: {best_epochs:d}>')


batch size: 500, learning rate: 0.1000, epoch: 10
validation weighted accuracy: 0.7167

batch size: 500, learning rate: 0.1000, epoch: 20
validation weighted accuracy: 0.7412

batch size: 500, learning rate: 0.1000, epoch: 30
validation weighted accuracy: 0.7274

batch size: 500, learning rate: 0.1000, epoch: 40
validation weighted accuracy: 0.7298

batch size: 500, learning rate: 0.1000, epoch: 50
validation weighted accuracy: 0.7381

batch size: 500, learning rate: 0.5000, epoch: 10
validation weighted accuracy: 0.6886

batch size: 500, learning rate: 0.5000, epoch: 20
validation weighted accuracy: 0.7118

batch size: 500, learning rate: 0.5000, epoch: 30
validation weighted accuracy: 0.6801

batch size: 500, learning rate: 0.5000, epoch: 40
validation weighted accuracy: 0.6423

batch size: 500, learning rate: 0.5000, epoch: 50
validation weighted accuracy: 0.7259

batch size: 500, learning rate: 1.0000, epoch: 10
validation weighted accuracy: 0.7311

batch size: 500, learning rate: 

Testing

In [12]:
batch_size = best_batch_size
learning_rate = best_learning_rate
epochs = best_epochs

# training again with the best known hyperparameters
# could have saved previous models instead using pickle but didn't
model = MLP(101*4, 129, 1)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# load training data in batches
train_loader = du.DataLoader(dataset=JUND_Dataset('train_dataset/'),
                            batch_size=batch_size,
                            shuffle=True)
# send model over to device
model = model.to(device)

'''
training again
'''
sum_loss = 0
model.train()
for epoch in range(1, epochs + 1):
    sum_loss = 0.

    for batch_idx, data in enumerate(train_loader):
        # send batch over to device
        x, y, w, a = data
        x, y, w, a = x.to(device), y.to(device), w.to(
            device), a.to(device)

        # zero out prev gradients
        optimizer.zero_grad()

        # run the forward pass
        output = model(x, a)

        # compute loss/error
        loss = functional.binary_cross_entropy_with_logits(
            output, y, weight=w)

        # sum up batch losses
        sum_loss += loss.item()

        # compute gradients and take a step
        loss.backward()
        optimizer.step()

'''
te$ting
'''
# load test images in batches
test_loader = du.DataLoader(dataset=JUND_Dataset('test_dataset/'),
                            batch_size=batch_size,
                            shuffle=True)

# set model in eval mode, since we are no longer training
model.eval()
test_loss = 0
correct = 0
weighted_correct = 0

# turn off gradient computation, will speed up testing
with torch.no_grad():
    for batch_idx, data in enumerate(test_loader):
        # send batches to device
        x, y, w, a = data
        x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)
        
        # compute forward pass
        output = model(x, a)

        # compute loss/error
        loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)
        
        # sum up batch loss
        test_loss += loss.item()

        # class-wise classification using sigmoid
        m = nn.Sigmoid()
        output = m(output)
        output = torch.where(output<0.5, output, torch.tensor(1.).to(device))
        output = torch.where(output==1.0, output, torch.tensor(0.).to(device))

        # add up number of correct predictions
        correct += torch.sum(output == y)
  
        diff = (output == y)

        # weighted accuracy of the predictions
        weighted_correct += torch.sum(torch.mul(w, diff))
        
    # test loss per example
    test_loss /= len(test_loader.dataset)
    
    # final test accuracy
    test_acc = weighted_correct / len(test_loader.dataset)
    print(f'Test loss: {test_loss:.5f} / weighted accuracy: {test_acc:.5f} /',
          f'correct: {correct} out of {len(test_loader.dataset)} test points')

Test loss: 0.00036 / weighted accuracy: 0.72833 / correct: 27806 out of 34528 test points
