In [None]:
import time
import numpy as np
import torch
import torchvision
from torch.autograd import Variable
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
import torch
import pickle

# Constants
n_labels = 20
image_dimensions = 32*32*3


class Perceptron(torch.nn.Module):
    
    def __init__(self, n_hidden_units, n_hidden_layers, drop_rate=0.5):
        super(Perceptron, self).__init__()
        
        self.n_hidden_layers = n_hidden_layers
        
        # set up perceptron layers and add dropout, outputs linear transformation y = Wx + b
        self.linear_function_1 = torch.nn.Linear(image_dimensions, n_hidden_units)
        
        # randomly zeroes some of the elements of the input tensor with probability p using samples from a 
        # Bernoulli distribution
        self.linear_function_1_drop = torch.nn.Dropout(drop_rate)
        
        # if number of hidden layers is 2
        if n_hidden_layers == 2:
            
            #TODO: don't know if I need these 
            self.linear_function_2 = torch.nn.Linear(n_hidden_units, n_hidden_units)
            
            self.linear_function_2_drop = torch.nn.Dropout(drop_rate)
            
        if n_hidden_layers == 3:
            
            self.linear_function_3 = torch.nn.Linear(n_hidden_units, n_hidden_units)
            
            self.linear_function_3_drop = torch.nn.Dropout(drop_rate)

        self.output = torch.nn.Linear(n_hidden_units, n_labels)

    # feed forward the data 
    def forward(self, input_data):
        
        input_data = input_data.view(-1, image_dimensions)
        
        # input x is passed to fully connected layer, then step function elu is applied, makes it non linear
        input_data = torch.nn.functional.elu(self.linear_function_1(input_data))
        
        input_data = self.linear_function_1_drop(input_data)
        
        if self.n_hidden_layers == 2:
         
            input_data = torch.nn.functional.elu(self.linear_function_2(input_data))
            
            input_data = self.linear_function_2_drop(input_data)
        
        if self.n_hidden_layers == 3:
         
            input_data = torch.nn.functional.elu(self.linear_function_3(input_data))
            
            input_data = self.linear_function_3_drop(input_data)
        
        # It is applied to all slices along dim, and will re-scale them so that 
        # the elements lie in the range [0, 1] and sum to 1
        return torch.nn.functional.log_softmax(self.output(input_data), -1)


def train_model(model, train_loader, optimizer, log_interval=100):
    
    # switch the module mode to .train() so that new weights can be learned after every epoch
    model.train()
    
    num_correct_predictions = 0
    
    # total number of data points 
    num_data_points = len(train_loader.dataset)
    
    for batch, (training_data, training_labels) in enumerate(train_loader):
        
        # sets the gradients to zero before we start back propogation 
        optimizer.zero_grad()
        
        # pass training data into model 
        prediction = model(training_data)

        # get the index of the max log-probability
        best_prediction = prediction.data.max(1)[1] 
        
        # determine number of correct predictions 
        num_correct_predictions += (best_prediction.eq(training_labels.data)).sum()
        
        accuracy = num_correct_predictions / num_data_points * 100.00
        
        # calculate negative log likelihood loss
        loss = torch.nn.functional.nll_loss(prediction, training_labels)
        
        # automatically performs the back propogation 
        loss.backward()
        
        # updates the weights accordingly
        optimizer.step()

def validate_model(loss_vector, accuracy_vector, model, validation_loader):
    
    # the common practice for evaluating/validation is using torch.no_grad() 
    # in pair with model.eval() to turn off gradients computation
    model.eval()
    
    loss = 0
    correct_predictions = 0
    
    with torch.no_grad():
        for x_train, y_train in validation_loader:
            
            #x_train, y_train = Variable(x_train), Variable(y_train)
            
            output = model(x_train)
            
            # The negative log likelihood loss - (input, target) - 
            loss += torch.nn.functional.nll_loss(output, y_train).data
            
            # get the index of the max log-probability
            best_prediction = output.data.max(1)[1] 
            
            # Compares two tensors element-wise for equality if they are broadcast-compatible; or returns False if they are not broadcast-compatible
            # .sum() Returns the sum of all elements in the input tensor.
            correct_predictions += best_prediction.eq(y_train.data).sum()

    loss /= len(validation_loader)
    
    accuracy = 100. * correct_predictions / len(validation_loader.dataset)

    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        loss, correct_predictions, len(validation_loader.dataset), accuracy))
    
def unpickle(file):
    with open(file, 'rb') as dataset:
        cifar_100_dataset = pickle.load(dataset, encoding='bytes')
    return cifar_100_dataset

class Train_Dataset(torch.utils.data.Dataset):

    def __init__(self, preprocessed_training_data, training_coarse_labels, transform):
        
        self.train_labels = training_coarse_labels
        
        self.train_data = preprocessed_training_data
        self.transform = transform
        self.train_data = self.train_data.reshape((50000, 3, 32, 32))
        self.train_data = self.train_data.transpose((0, 2, 3, 1))
    
    def __len__(self):
        # total number of training samples
        return len(self.train_data)

    def __getitem__(self, index):
        img = self.train_data[index]
        target = self.train_labels[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)
    
        return img, target
    

class Test_Dataset(torch.utils.data.Dataset):

    def __init__(self, preprocessed_testing_data, testing_coarse_labels, transform):
        self.test_labels = testing_coarse_labels
        self.test_data = preprocessed_testing_data
        self.transform = transform
        self.test_data = self.test_data.reshape((10000, 3, 32, 32))
        self.test_data = self.test_data.transpose((0, 2, 3, 1))
    
    def __len__(self):
        # total number of testing samples 
        return len(self.test_data)

    def __getitem__(self, index):
        img = self.test_data[index]
    
        target = self.test_labels[index]
        img = Image.fromarray(img)
        
        if self.transform is not None:
            img = self.transform(img)

        return img, target
    
def load_data_and_create_dataloaders():
    
    # transformations to be applied to data 
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    # load in data 
    meta = unpickle('cifar-100-python/meta')
    train = unpickle('cifar-100-python/train')
    test = unpickle('cifar-100-python/test')

    # extract training labels 
    training_coarse_labels = train[b'coarse_labels']
    
    # extract training data
    training_data = train[b'data']
    
    # extract test data
    testing_data = test[b'data']
    
    # extract test labels
    testing_coarse_labels = test[b'coarse_labels']
    
    # dataset for training model    
    training_set = Train_Dataset(training_data, training_coarse_labels, transform=transform)
    
    # data loader for training set 
    train_loader = torch.utils.data.DataLoader(training_set, batch_size=4, shuffle=True, num_workers=0, pin_memory=True)

    # dataset for testing model
    testing_set = Test_Dataset(testing_data, testing_coarse_labels, transform=transform)
    
    # data loader for testing set 
    validation_loader = torch.utils.data.DataLoader(testing_set, batch_size=4, shuffle=False, num_workers=0, pin_memory=True)
    
    return train_loader, validation_loader
    
def main():
    
    train_loader, validation_loader = load_data_and_create_dataloaders()
    
    num_hidden_units = 200
    num_hidden_layers = 1
    start_time = time.time()
    
    model = Perceptron(num_hidden_units, num_hidden_layers)

    # stochastic gradient descent 
    optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=.75, weight_decay=.0005)

    losses = [] 
    accuracies = []
    
    for iteration in range(1, 20):
        
        train_model(model, train_loader, optimizer)
        
        validate_model(losses, accuracies, model, validation_loader)
        
    total_time = (time.time() - start_time)/60
    
    print("Total time", total_time)


main()




Validation set: Average loss: 2.4015, Accuracy: 2657/10000 (27%)

