In [0]:
#Submitted by Karan Bhatia (kb3053) and Durvesh Vedak (dvv223) 
import numpy as np

In [0]:
import torch
import torchvision
import torchvision.transforms as transforms

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [0]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

In [9]:
trainset = torchvision.datasets.CIFAR10(root='./data',train=True,download=True,transform=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


In [10]:
testset = torchvision.datasets.CIFAR10(root='./data',train=False,download=True,transform=True)

Files already downloaded and verified


In [0]:
trainset_x = trainset.train_data

In [0]:
trainset_y = np.array(trainset.train_labels)

In [0]:
trainset_y = trainset_y.reshape(trainset_y.shape[0],)

In [0]:
testset_x = testset.test_data

In [0]:
testset_y = np.array(testset.test_labels)

In [0]:
testset_y = testset_y.reshape(testset_y.shape[0],)

In [0]:
class NeuralNetwork(object):
    """This class represents a Neural Network"""
    def __init__(self, layer_dimensions):
        """
        param layer_dimensions: A python list containing number of 
        neurons in each layer starting from input layer to output layer.
        """
        # Number of layers including input and output layer.
        self.num_layers = len(layer_dimensions) 
        # Creating an empty dictionary having paramter arrays b/w any 2 layers.
        self.parameters = {}
        
        self.layer_dimensions = layer_dimensions
        for i in range(1, self.num_layers):
            self.parameters['W' + str(i)] = np.random.randn(self.layer_dimensions[i], layer_dimensions[i-1]) * 0.001
            self.parameters['b' + str(i)] = np.zeros((self.layer_dimensions[i], 1))         
    
    def affineForward(self, A, W, b):
        Z = np.dot(W,A) + b
        
        return Z
    
    def relu(self, X):
        return np.maximum(X, 0)
    
    def softmax(self, Z):
        Z = np.exp(Z)
        
        Z = Z / np.sum(Z, axis = 0)
        
        return Z
    
    def activationForward(self, A, W, b, activation = "relu"):
        if activation =="relu":
            Z = self.affineForward(A, W, b)
            Act = self.relu(Z)
            
            return Act, Z
    
    def forwardPropagation(self, X):
        caches = [(X,None)]
        
        A = X
        
        for i in range(1, self.num_layers-1):
            A_previous = A
            
            A, Z = self.activationForward(A_previous, self.parameters['W' + str(i)], self.parameters['b' + str(i)], activation='relu')
            
            caches.append((A,Z))
            
        Z = self.affineForward(A , self.parameters['W' + str(self.num_layers-1)], self.parameters['b' + str(self.num_layers-1)])     
            
        y_hat = self.softmax(Z)
        
        return Z, y_hat, caches
    
    def cost(self, y_hat, Y):
        loss = - np.sum(np.multiply(Y, np.log(y_hat))) / Y.shape[1]
        
        return loss
    
    def relu_derivative(self,x):
        x[x<=0] = 0
        
        x[x>0] = 1
        
        return x
    
    def backPropagation(self, Z, y_hat, caches, Y):
        gradients = {}
        
        m = Y.shape[1]
        
        dZL = y_hat - Y
        
        gradients["dW" + str(self.num_layers - 1)] = np.dot(dZL, caches[-1][0].T) / m 
                                                                
        gradients["db" + str(self.num_layers - 1)] = np.sum(dZL , axis = 1, keepdims= True) / m
        
        dz_prev = dZL
        
        for i in range(self.num_layers - 2, 0,-1):
            
            da = np.dot(self.parameters['W' + str(i+1)].T, dz_prev)
            
            dz = np.multiply(da, self.relu_derivative(caches[i][1]))
            
            gradients["dW" + str(i)] = np.dot(dz, caches[i-1][0].T) / m
            
            gradients["db" + str(i)] = np.sum(dz, axis = 1 , keepdims= True) / m 
            
            dz_prev = dz
    
        return gradients
    
    def update_weights(self, gradients, alpha):
        
        for i in range(1, self.num_layers):
            self.parameters['W' + str(i)] = self.parameters['W' + str(i)] - alpha * gradients['dW' + str(i)]
            
            self.parameters['b' + str(i)] = self.parameters['b' + str(i)] - alpha * gradients['db' + str(i)]

    def train(self, X, Y , iters = 1, alpha = 0.0001, batch_size = 100, print_every = 100):
        
        costs = []
        
        for j in range(iters):
            loss = 0 
            print("Epoch "+ str(j))
            for i in range(0, X.shape[1], batch_size):
                x_batch = X[:,i:i+batch_size]
                y_batch = Y[:,i:i+batch_size]
                Z , y_hat , caches = self.forwardPropagation(x_batch)

                loss = self.cost(y_hat, y_batch)
                
                
                
                gradients = self.backPropagation(Z, y_hat, caches, y_batch)    

                self.update_weights(gradients, alpha)
            
            print(loss)
            costs.append(loss)
            
        return costs
    
    def predict(X_test):
        
        z, y_hat, caches = self.forwardPropagation(X_test)
        
        return y_hat

In [0]:
nn = NeuralNetwork([3072,150,100,10])

In [0]:
training_x_set = trainset_x

In [0]:
training_x_set = training_x_set.reshape(training_x_set.shape[0],-1)

In [0]:
training_y_set = trainset_y

In [0]:
def convert_to_one_hot(Y):
    """Function Paramters: Y : A numpy array/vector of labels.
       Return: Function Returns a numpy array of shape (m , maxlabel+1) where m is length of Y.
       maxlabel + 1 means if we have max label as 4, we need 4 zeros and 1 one to represent it as one-hot."""
    
    # Initializing one-hot numpy array
    a = np.zeros((Y.shape[0],np.max(Y)+1))
    print(a.shape)
    # Storing 1 at index == label for an example.
    a[np.arange(Y.shape[0]), Y] = 1
    
    return a

In [23]:
training_y_set = convert_to_one_hot(training_y_set)

(50000, 10)


In [102]:
costs = nn.train(training_x_set.T, training_y_set.T, iters = 10, batch_size = 100,alpha = 0.0002)

Epoch 0
0.8981679450616866
Epoch 1
0.9107100398834639
Epoch 2
0.902143238557128
Epoch 3
0.8890928636272026
Epoch 4
0.8738777637521636
Epoch 5
0.8735680015746975
Epoch 6
0.8783691884334109
Epoch 7
0.8733441395601149
Epoch 8
0.8909076653560166
Epoch 9
0.8753418718531143


In [0]:
test_x_set = testset_x

In [0]:
test_x_set = test_x_set.reshape(test_x_set.shape[0],-1)

In [0]:
def predict(X,nn):
  z, y_hat, caches = nn.forwardPropagation(X)
        
  return y_hat  
  

In [0]:
y_hat_test = predict(test_x_set.T, nn)

In [107]:
y_hat_test.shape

(10, 10000)

In [0]:
predictions_labels = np.argmax(y_hat_test, axis = 0)


In [116]:
predictions_labels = predictions_labels.astype(int)

3


In [0]:
np.savetxt('ans1-dvv223.txt',predictions_labels,delimiter='\n',fmt = '%i')

In [0]:
acc = np.sum(testset_y == predictions_labels) / testset_y.shape[0]

In [110]:
acc

0.5251

In [80]:
print(predictions_labels)

[3 1 0 ... 3 4 7]


In [0]:
params = nn.parameters

In [69]:
params

{'W1': array([[ 0.00103204,  0.00012186,  0.00111803, ..., -0.00091976,
         -0.00060246,  0.00080853],
        [-0.00277991, -0.00093367, -0.00134731, ..., -0.00103389,
         -0.0004657 , -0.00038179],
        [-0.00191525, -0.00352181, -0.00429492, ...,  0.00367479,
          0.00170952,  0.00227144],
        ...,
        [-0.00311895, -0.00285253,  0.00281467, ..., -0.00142098,
          0.00018167,  0.00026197],
        [ 0.00069898,  0.00209148,  0.00831094, ...,  0.00247318,
         -0.00047954, -0.00309718],
        [ 0.0034142 ,  0.00012669, -0.0027484 , ..., -0.00263904,
         -0.00049635,  0.0034235 ]]),
 'W2': array([[ 0.00317419,  0.00251642, -0.00266715, ..., -0.00285604,
         -0.01088401,  0.00371027],
        [-0.00209123,  0.00151611,  0.0113429 , ..., -0.01797853,
         -0.02726917,  0.00403793],
        [ 0.00069928, -0.00536933,  0.00666701, ...,  0.00404588,
         -0.00516287,  0.00438775],
        ...,
        [ 0.00131813,  0.0137815 , -0.0031