In [13]:
import time
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
import torch
from keras.datasets import cifar100

# Constants
n_labels = 20

image_size = 32*32*3


class Perceptron(torch.nn.Module):
    
    def __init__(self, n_hidden_units, n_hidden_layers, drop_rate=0.5):
        super(Perceptron, self).__init__()
        
        self.n_hidden_layers = n_hidden_layers
        
        # set up perceptron layers and add dropout, outputs linear transformation y = Wx + b
        self.linear_function_1 = torch.nn.Linear(image_size, n_hidden_units)
        
        # randomly zeroes some of the elements of the input tensor with probability p using samples from a 
        # Bernoulli distribution
        self.linear_function_1_drop = torch.nn.Dropout(drop_rate)
        
        # if number of hidden layers is 2
        if n_hidden_layers == 2:
            
            #TODO: don't know if I need these 
            self.linear_function_2 = torch.nn.Linear(n_hidden_units, n_hidden_units)
            
            self.linear_function_2_drop = torch.nn.Dropout(drop_rate)
            
        if n_hidden_layers == 3:
            
            self.linear_function_3 = torch.nn.Linear(n_hidden_units, n_hidden_units)
            
            self.linear_function_3_drop = torch.nn.Dropout(drop_rate)

        self.output = torch.nn.Linear(n_hidden_units, n_labels)

    # feed forward the data 
    def forward(self, input_data):
        
        #View tensor shares the same underlying data with its base tensor. 
        #Supporting View avoids explicit data copy, thus allows us to do fast and memory 
        # efficient reshaping, slicing and element-wise operations.
        input_data = input_data.view(-1, image_size)
        #input_data = torch.flatten(input_data)
        
        # input x is passed to fully connected layer, then step function elu is applied, makes it non linear
        input_data = torch.nn.functional.elu(self.linear_function_1(input_data))
        
        input_data = self.linear_function_1_drop(input_data)
        
        if self.n_hidden_layers == 2:
         
            input_data = torch.nn.functional.elu(self.linear_function_2(input_data))
            
            input_data = self.linear_function_2_drop(input_data)
        
        if self.n_hidden_layers == 3:
         
            input_data = torch.nn.functional.elu(self.linear_function_3(input_data))
            
            input_data = self.linear_function_3_drop(input_data)
        
        # It is applied to all slices along dim, and will re-scale them so that 
        # the elements lie in the range [0, 1] and sum to 1
        return torch.nn.functional.log_softmax(self.output(input_data), -1)


def get_index_best_prediction(predictions):
    index_best_prediction = predictions.max(1)[1] 
    return index_best_prediction



def train_classifier(classifier, train_loader, optimizer, log_interval=100):
    
    # switch the module mode to .train() so that new weights can be learned after every epoch
    classifier.train()
    
    num_correct_predictions = 0
    
    # total number of data points 
    num_data_points = len(train_loader.dataset)
    
    for batch, (training_data, training_labels) in enumerate(train_loader):
     
        # sets the gradients to zero before we start back propogation 
        optimizer.zero_grad()
        
        # pass training data into model 
        prediction = classifier(training_data)
        
        predictions = prediction.data

        # TODO: make this into a function - plus the one below it 
        # get the index of the max log-probability
        best_prediction = predictions.max(1)[1] 
        
        # determine number of correct predictions 
        num_correct_predictions += (best_prediction.eq(training_labels.data)).sum()
        
        accuracy = num_correct_predictions / num_data_points * 100.00
        
        # calculate negative log likelihood loss
        loss = torch.nn.functional.nll_loss(prediction, training_labels)
        
        # automatically performs the back propogation 
        loss.backward()
        
        # updates the weights accordingly
        optimizer.step()

def calculate_predictions(classifier, validation_loader):
    
    # the common practice for evaluating/validation is using torch.no_grad() 
    # in pair with model.eval() to turn off gradients computation
    classifier.eval()
    
    correct_predictions = 0
    
    with torch.no_grad():
        for training_data, training_labels in validation_loader:
            
            output = classifier(training_data)
            
            # get the index of the max log-probability
            best_prediction = output.data.max(1)[1] 
            
            # Compares two tensors element-wise for equality if they are broadcast-compatible; or returns False if they are not broadcast-compatible
            # .sum() Returns the sum of all elements in the input tensor.
            correct_predictions += best_prediction.eq(training_labels.data).sum()

    accuracy = 100. * correct_predictions / len(validation_loader.dataset)

    print('\nValidation set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct_predictions, len(validation_loader.dataset), accuracy))
    

# custom dataset object for training data 
class Train_Dataset(torch.utils.data.Dataset):

    def __init__(self, training_data, training_labels, transform):
        
        self.training_labels = training_labels
        
        self.training_data = training_data
        
        self.transform = transform
    

    def __getitem__(self, i):
        
        image_array = self.training_data[i]
        
        target_label = self.training_labels[i]
        
        image = Image.fromarray(image_array)

        transformed_image = self.transform(image)
    
        return transformed_image, target_label
    
    def __len__(self):
        
        # returns number of training samples 
        return len(self.training_data)
    
# custom dataset object for testing data
class Test_Dataset(torch.utils.data.Dataset):

    def __init__(self, testing_data, testing_labels, transform):
        
        self.testing_labels = testing_labels
        
        self.testing_data = testing_data
        
        self.transform = transform
    

    def __getitem__(self, i):
        
        image_array = self.testing_data[i]
        
        target_label = self.testing_labels[i]
        
        image = Image.fromarray(image_array)
    
        transformed_image = self.transform(image)

        return transformed_image, target_label
    
    def __len__(self):
        
        # returns number of testing samples
        return len(self.testing_data)
    
def load_data_and_create_dataloaders():
    
    (training_data, training_labels), (testing_data, testing_labels) = (cifar100.load_data("coarse"))
    
    testing_labels = np.squeeze(testing_labels)
    
    training_labels = np.squeeze(training_labels)
    
    # transformations to be applied to data 
    transform = transforms.Compose([transforms.ToTensor()])
    
    # dataset for training model    
    training_set = Train_Dataset(training_data, training_labels, transform=transform)

    # data loader for training set 
    train_data_loader = torch.utils.data.DataLoader(training_set, batch_size=4, shuffle=True, num_workers=0, pin_memory=True)

    # dataset for testing model
    testing_set = Test_Dataset(testing_data, testing_labels, transform=transform)
    
    # data loader for testing set 
    test_data_loader = torch.utils.data.DataLoader(testing_set, batch_size=4, shuffle=False, num_workers=0, pin_memory=True)
    
    return train_data_loader, test_data_loader
    
def main():
    
    train_data_loader, test_data_loader = load_data_and_create_dataloaders()
    
    num_hidden_units = 200
    
    num_hidden_layers = 1
    
    start_time = time.time()
    
    classifier = Perceptron(num_hidden_units, num_hidden_layers)

    # stochastic gradient descent 
    # could try different optimisers here 
    optimizer = torch.optim.SGD(classifier.parameters(), lr=0.001, momentum=.75, weight_decay=.0005)
    
    num_iterations = 20
    
    for iteration in range(0, num_iterations):
        
        train_classifier(classifier, train_data_loader, optimizer)
        
        # TODO: change validation stuff to test 
        calculate_predictions(classifier, test_data_loader)
        
    end_time = time.time()
        
    total_time = (end_time - start_time)/60
    
    print("Total time", total_time)


main()


Validation set: Accuracy: 2208/10000 (22%)


Validation set: Accuracy: 2454/10000 (25%)


Validation set: Accuracy: 2494/10000 (25%)



KeyboardInterrupt: 