import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim
import torch.utils.data as du
from torch.utils.data import Dataset
from joblib import load
from tqdm import tqdm

device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
print(f"using device: {device}")read data

In [2]:
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
# print(f"using device: {device}")

In [3]:
torch.backends.cudnn.deterministic = True

class JUND_Dataset(Dataset):
    def __init__(self, data_dir):
        '''load X, y, w, a from data_dir'''        
        super(JUND_Dataset, self).__init__()

        # load X, y, w, a from given data_dir
        # convert them into torch tensors
        x = load(data_dir + 'shard-0-X.joblib')
        y = load(data_dir +'shard-0-y.joblib')
        w = load(data_dir + 'shard-0-w.joblib')
        a = load(data_dir + 'shard-0-a.joblib')

        self.x = torch.tensor(x).float()  
        self.y = torch.tensor(y).float()
        self.w = torch.tensor(w).float()
        self.a = torch.tensor(a).float()
        
    def __len__(self):
        '''return len of dataset'''
        
        return len(self.y)
    
    def __getitem__(self, idx):
        '''return X, y, w, and a values at index idx'''
        _x = torch.swapaxes(self.x[idx], 0, 1).double()
        _y = self.y[idx]
        _w = self.w[idx]
        _a = self.a[idx]
        
        return _x, _y, _w, _a

define a Multi Layer Perceptron model

In [4]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        #two fully connected layers
        self.fc1 = nn.Conv1d(in_channels=4, out_channels=4, kernel_size=9, stride=3, padding=0, device=device, dtype=float)
        self.fc2 = nn.Conv1d(in_channels=4, out_channels=4, kernel_size=4, stride=1, padding=0, device=device, dtype=float)

        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(p=0.2)
        
        self.fc3 = nn.Linear(112, 56)
        self.fc4 = nn.Linear(56+1, 1)
        
    def forward(self, x, a):
        x = self.fc1(x) # CNN 1 input: [1000, 4, 101]
        x = self.fc2(x) # CNN 2 input: [1000, 4, 31]

        x = self.flatten(x) # input: [1000, 4, 28]
        x = functional.relu(x) # input: [1000, 112]
        x = self.dropout(x)

        x = self.fc3(x.float())
        x = torch.cat((x, a), dim=1)
        x = self.fc4(x)
 
        return x

Set up hyperparameters

In [5]:
batch_size_list = [2000]
learning_rate_list = [0.5, 0.75, 1.0, 1.25, 1.5]
epochs_list = [50, 100, 150, 200, 250, 300] # + 25, 75

Training and validating using different hyperparameter configurations

In [6]:
best_acc = 0
best_learning_rate = 0
best_epochs = 0
best_model = None

for batch_size in batch_size_list:
    for learning_rate in learning_rate_list:
        for epochs in epochs_list:
            '''
            training
            '''   
            model = CNN()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)

            
            # load training data in batches
            train_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/train_dataset/'),
                                        batch_size=batch_size,
                                        shuffle=True)
            # send model over to device
            model = model.to(device)

            sum_loss = 0
            model.train()
            for epoch in range(1, epochs + 1):    
                sum_loss = 0.

                for batch_idx, data in enumerate(train_loader):
                    x, y, w, a = data
                    x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)
                                    
                    # zero out prev gradients
                    optimizer.zero_grad()

                    # run the forward pass
                    output = model(x, a)
                    
                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)


                    # sum up batch losses
                    sum_loss += loss.item()

                    # compute gradients and take a step
                    loss.backward()
                    optimizer.step()

                # average loss per example    
                sum_loss /= len(train_loader.dataset)
                # print(sum_loss)
                
                
            '''
            validation
            '''
            # load test images in batches
            valid_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/valid_dataset/'),
                                        batch_size=batch_size,
                                        shuffle=True)

            # set model in eval mode, since we are no longer training
            model.eval()
            valid_loss = 0
            correct = 0
            weighted_correct = 0

            # turn off gradient computation, will speed up testing
            with torch.no_grad():
                for batch_idx, data in enumerate(valid_loader):
                    # send batches to device
                    x, y, w, a = data
                    x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)

                    # compute forward pass
                    output = model(x, a)

                    # compute loss/error
                    loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)

                    # sum up batch loss
                    valid_loss += loss.item()

                    # class-wise classification using sigmoid
                    m = nn.Sigmoid()
                    output = m(output)
                    output = torch.where(output < 0.5, output, torch.tensor(1.).to(device))
                    output = torch.where(output == 1.0, output,torch.tensor(0.).to(device))

                    # add up number of correct predictions
                    correct += torch.sum(output == y)

                    diff = (output == y)

                    # weighted accuracy of the predictions
                    weighted_correct += torch.sum(torch.mul(w, diff))

                # test loss per example
                valid_loss /= len(valid_loader.dataset)

                # final test accuracy
                valid_acc = weighted_correct / len(valid_loader.dataset)
                print(f'----learning rate: {learning_rate:.3f}, epochs:{epochs:d}----')
                print(f'Validation loss: {valid_loss:.5f} / weighted accuracy: {valid_acc:.5f} /',
                    f'correct: {correct} out of {len(valid_loader.dataset)} test points\n')
                
            if valid_acc > best_acc:
                best_learning_rate = learning_rate
                best_epochs = epochs
                best_model = model

----learning rate: 0.500, epochs:50----
Validation loss: 0.00027 / weighted accuracy: 0.74261 / correct: 28553 out of 34527 test points

----learning rate: 0.500, epochs:100----
Validation loss: 0.00027 / weighted accuracy: 0.72261 / correct: 30226 out of 34527 test points

----learning rate: 0.500, epochs:150----
Validation loss: 0.00041 / weighted accuracy: 0.65593 / correct: 33848 out of 34527 test points

----learning rate: 0.500, epochs:200----
Validation loss: 0.00075 / weighted accuracy: 0.58941 / correct: 34198 out of 34527 test points

----learning rate: 0.500, epochs:250----
Validation loss: 0.00027 / weighted accuracy: 0.73593 / correct: 28797 out of 34527 test points

----learning rate: 0.500, epochs:300----
Validation loss: 0.00036 / weighted accuracy: 0.68841 / correct: 33502 out of 34527 test points

----learning rate: 0.750, epochs:50----
Validation loss: 0.00028 / weighted accuracy: 0.70948 / correct: 23930 out of 34527 test points

----learning rate: 0.750, epochs:100

Testing

In [7]:
learning_rate = best_learning_rate
epochs = best_epochs
model = best_model

print(f'Best hyperparameters\n  learning rate: {learning_rate:.3f}, epochs:{epochs:d}')

'''
te$ting
'''
# load test images in batches
test_loader = du.DataLoader(dataset=JUND_Dataset('../JUND_data/test_dataset/'),
                            batch_size=batch_size,
                            shuffle=True)

# set model in eval mode, since we are no longer training
model.eval()
test_loss = 0
correct = 0
weighted_correct = 0

# turn off gradient computation, will speed up testing
with torch.no_grad():
    for batch_idx, data in enumerate(test_loader):
        # send batches to device
        x, y, w, a = data
        x, y, w, a = x.to(device), y.to(device), w.to(device), a.to(device)
        
        # compute forward pass
        output = model(x, a)

        # compute loss/error
        loss = functional.binary_cross_entropy_with_logits(output, y, weight=w)
        
        # sum up batch loss
        test_loss += loss.item()

        # class-wise classification using sigmoid
        m = nn.Sigmoid()
        output = m(output)
        output = torch.where(output<0.5, output, torch.tensor(1.).to(device))
        output = torch.where(output==1.0, output, torch.tensor(0.).to(device))

        # add up number of correct predictions
        diff = (output == y)
        correct += torch.sum(diff)
        
        # weighted accuracy of the predictions
        weighted_correct += torch.sum(torch.mul(w, diff)) # diff = [1 if the prediction is correct; else 0]
        
    # test loss per example
    test_loss /= len(test_loader.dataset)
    
    # final test accuracy
    test_acc = weighted_correct / len(test_loader.dataset)
    print(f'Test loss: {test_loss:.5f} / weighted accuracy: {test_acc:.5f} /',
          f'correct: {correct} out of {len(test_loader.dataset)} test points')

Best hyperparameters
  learning rate: 1.500, epochs:300
Test loss: 0.00035 / weighted accuracy: 0.72066 / correct: 32672 out of 34528 test points
