In [0]:
# GPU support

use_cuda = True


In [2]:
# All imports

import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import pandas as pd

import matplotlib.pyplot as plt
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras import backend as K
import torch.nn.functional as F

import timeit
import random

Using TensorFlow backend.


In [0]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()


# # # To print the image of the written number along with the value in the y column 
# image_index = 1 # You may select anything up to y_train.shape[0]
# print(y_train[image_index]) # The label is 0
# plt.imshow(x_train[image_index], cmap='Greys')

In [0]:

# Reshaping the array to 4dim to make it work with Keras
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

input_shape = (28, 28, 1)

# Ensuring the datatype is float
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalizing RGB codes by dividing by the max RGB value.
x_train /= 255
x_test /= 255

y_train.reshape(y_train.shape[0],1)
y_test.reshape(y_test.shape[0],1)

# Converting to categorical values, one hot encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# print('x_train shape:', x_train.shape)
# print('Number of images in x_train', x_train.shape[0])
# print('Number of images in x_test', x_test.shape[0])



In [0]:
# Converting to tensor from numpy

x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)

x_test = torch.from_numpy(x_test)
y_test = torch.from_numpy(y_test)

In [0]:
# Creating training set

training_size = 4000
myXtrain = x_train[0:training_size, :].reshape(training_size,784)
myYtrain = y_train[0:training_size, :].reshape(training_size,10)

In [0]:
# Creating test set

test_size = 500
myXtest = x_test[0:test_size, :].reshape(test_size,784)
myYtest = y_test[0:test_size, :].reshape(test_size,10)

In [0]:
# Function for randomising the chosen rows weights for controlled dropout

def controlled_drop(n):
  return (random.randint(0,2000 - n))

In [0]:
# CREATING NN ARCHITECTURE 784-2000-2000-10

class myNN(nn.Module):
  def __init__(self):
    super(myNN, self).__init__()    
    self.weights1 = nn.Parameter(torch.randn(784,2000)*0.1)
    self.weights2 = nn.Parameter(torch.randn(2000,2000)*0.1)
    self.weights3 = nn.Parameter(torch.randn(2000,10)*0.1)
    self.bias1 = nn.Parameter(torch.randn(1,2000))
    self.bias2 = nn.Parameter(torch.randn(1,2000))
    self.bias3 = nn.Parameter(torch.randn(1,10))

    self.bias1.requires_grad_()
    self.bias2.requires_grad_()
    self.bias3.requires_grad_()

    self.weights1.requires_grad_()
    self.weights2.requires_grad_()
    self.weights3.requires_grad_()


  def forward(self, X_train,dp):
      h1 = torch.matmul(X_train,self.weights1 ) + self.bias1
      a1 = h1.relu()
      drop1 = controlled_drop(dp)
      a1 /= (dp/2000)

      h2 = torch.matmul( a1[:,drop1:drop1 + dp] , self.weights2[drop1:drop1+dp,:]  ) + self.bias2
      a2 = h2.relu()
      drop2 = controlled_drop(dp)
      a2 /= (dp/2000)

      h3 = torch.matmul( a2[:,drop2:drop2 + dp] , self.weights3[drop2: drop2+dp,:]  ) + self.bias3
      return h3
   

In [0]:
def training_drop(myXtrain, myYtrain, firstNN, epochs, opti, criterion): 

  for epoch in range(epochs):
    y_out = firstNN(myXtrain,1000)
    lossNN = criterion(2*y_out, torch.max(myYtrain, 1)[1])  # Multiplying by 2 because we've kept dropout at 50%, so only half the weights are active
    opti.zero_grad()
    lossNN.backward()
    opti.step()

    if epoch%100 == 0:
      print(lossNN, epoch)


In [0]:
# Instantiating my network architecture
firstNN = myNN()

In [0]:
# Defining the conditionalities for training 

criterion = nn.CrossEntropyLoss()
opti = optim.Adam(firstNN.parameters(), lr = 0.00005)

In [0]:
# Enabling GPU support for forward and backprop
device = torch.device("cuda")

myXtrain = myXtrain.to(device)
myYtrain = myYtrain.to(device)
firstNN = firstNN.to(device)
myXtest = myXtest.to(device)
myYtest = myYtest.to(device)

In [14]:
# Start training the model

epochs= 8000
start = timeit.default_timer()

training_drop(myXtrain, myYtrain, firstNN, epochs, opti, criterion)

end = timeit.default_timer()
print("Total time taken is : ", end - start)

tensor(67.8512, device='cuda:0', grad_fn=<NllLossBackward>) 0
tensor(20.7574, device='cuda:0', grad_fn=<NllLossBackward>) 100
tensor(9.4452, device='cuda:0', grad_fn=<NllLossBackward>) 200
tensor(6.0802, device='cuda:0', grad_fn=<NllLossBackward>) 300
tensor(6.2549, device='cuda:0', grad_fn=<NllLossBackward>) 400
tensor(3.1057, device='cuda:0', grad_fn=<NllLossBackward>) 500
tensor(4.4172, device='cuda:0', grad_fn=<NllLossBackward>) 600
tensor(2.4812, device='cuda:0', grad_fn=<NllLossBackward>) 700
tensor(2.3765, device='cuda:0', grad_fn=<NllLossBackward>) 800
tensor(1.1867, device='cuda:0', grad_fn=<NllLossBackward>) 900
tensor(1.1361, device='cuda:0', grad_fn=<NllLossBackward>) 1000
tensor(1.3719, device='cuda:0', grad_fn=<NllLossBackward>) 1100
tensor(0.7048, device='cuda:0', grad_fn=<NllLossBackward>) 1200
tensor(0.8246, device='cuda:0', grad_fn=<NllLossBackward>) 1300
tensor(0.6006, device='cuda:0', grad_fn=<NllLossBackward>) 1400
tensor(0.7999, device='cuda:0', grad_fn=<NllLossBa

In [15]:
# Training accuracy

postout = firstNN(myXtrain,2000)

correct = torch.eq(torch.max(postout, 1)[1], torch.max(myYtrain, 1)[1]).sum()
print(correct)
print("Training accuracy is : ", 100 * correct.item()/ training_size )

tensor(3970, device='cuda:0')
Training accuracy is :  99.25


In [16]:
# Testing accuracy

y_outtest = firstNN(myXtest,2000)
lossNNtest = criterion(y_outtest, torch.max(myYtest, 1)[1])
print(lossNNtest.item())
correct_test = torch.eq(torch.max(y_outtest, 1)[1], torch.max(myYtest, 1)[1]).sum()
print(correct_test)
print("Test accuracy is : ", 100 * correct_test.item()/ test_size )

0.8572548627853394
tensor(451, device='cuda:0')
Test accuracy is :  90.2


In [17]:
correct_test = torch.eq(torch.max(y_outtest, 1)[1], torch.max(myYtest, 1)[1]).sum()
print(correct_test)
print("Test accuracy is : ", 100 * correct_test.item()/ test_size )

tensor(451, device='cuda:0')
Test accuracy is :  90.2
