<a href="https://colab.research.google.com/github/PriyankaGona/machinelearning-assignments/blob/master/problem3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
import numpy as np
from keras.datasets import mnist
import sys

Using TensorFlow backend.


In [0]:
# convert values to binary values
def to_categorical(y,nb_classes):
  output = []
  for i in y:
    temp = np.zeros(nb_classes)
    temp[i] = 1
    output.append(temp)
  output = np.array(output)
  output = output.reshape(y.shape[0],nb_classes)
  return output

In [0]:
# initialize weights and bias
def init(dim):
  W = np.random.randn(nb_classes,dim)*0.01
  b = np.random.randn(nb_classes,1)
  return W,b

In [0]:
# softmax function
def softmax(X):
    return np.exp(X)/np.sum(np.exp(X), axis = 0, keepdims = True)

In [0]:
# forward propagation
def forward_propagate(W,b,x):
  linear_transformation = np.dot(W,np.transpose(x)) + (b)
  output = softmax(linear_transformation)
  return output

In [0]:
# crossentropy loss function
def crossentropy_loss(y_train,output,batch_size):
  loss = -np.sum(np.multiply(np.transpose(y_train),np.log(output))+np.multiply(np.transpose(y_train),np.log(output)),axis=1,keepdims=True)/batch_size
  return np.squeeze(loss)

In [0]:
# backward propogation
def backward_propagate(x_train,y_train,output,W,b,learning_rate,batch_size):
  dz = output - np.transpose(y_train)
  dW = np.dot(dz,x_train)
  db = np.sum(dz,axis=1,keepdims=True)
  W = W - learning_rate*dW
  b = b - learning_rate*db
  return W,b

In [0]:
# mini-batch stochastic gradient descent
def sgd(W,b,x_train,y_train,learning_rate,batch_size,epochs):
  for j in range(epochs):
    training_loss = []
    for i in range(0, x_train.shape[0], batch_size):
      x_train_mini = x_train[i:i+batch_size]
      y_train_mini = y_train[i:i+batch_size]
      output = forward_propagate(W,b,x_train_mini)
      loss_matrix = crossentropy_loss(y_train_mini,output,batch_size)
      loss_list = loss_matrix.tolist()
      training_loss.extend(loss_list)
      W,b = backward_propagate(x_train_mini,y_train_mini,output,W,b,learning_rate,batch_size)
    epochLoss=sum(training_loss)/len(training_loss)
    print("Epoch {}/{}\t - loss : {}".format(j+1,epochs,round(epochLoss,4)))

  return W,b

In [0]:
# calculating accuracy of test data after training  
def predict(W, b, X, Y):
    Y_prediction = forward_propagate(W,b,X)
    Y_prediction = Y_prediction.T
    accuracy = 100 - np.mean(np.abs(Y_prediction - Y)) * 100
    return round(accuracy,4)

In [0]:
# variables initialization
learning_rate = 0.005
batch_size = 8
experiments = 20
nb_classes=10

In [11]:
# mnist dataset loading using keras
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshaping the image to 1-dimensional vector
x_train = x_train.reshape(x_train.shape[0],x_train.shape[1]*x_train.shape[2])
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1]*x_test.shape[2])
y_actual_testdata = y_test

y_train = to_categorical(y_train,nb_classes)
y_test = to_categorical(y_test,nb_classes)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# normalizing
x_train /= 255
x_test /= 255


W,b = init(x_train.shape[1])
W,b = sgd(W,b,x_train,y_train,learning_rate,batch_size,experiments)

acc = predict(W,b,x_test,y_test)
print("Accuracy: {}".format(acc))

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Epoch 1/20	 - loss : 0.0755
Epoch 2/20	 - loss : 0.0599
Epoch 3/20	 - loss : 0.0572
Epoch 4/20	 - loss : 0.0557
Epoch 5/20	 - loss : 0.0547
Epoch 6/20	 - loss : 0.054
Epoch 7/20	 - loss : 0.0534
Epoch 8/20	 - loss : 0.0529
Epoch 9/20	 - loss : 0.0525
Epoch 10/20	 - loss : 0.0521
Epoch 11/20	 - loss : 0.0518
Epoch 12/20	 - loss : 0.0516
Epoch 13/20	 - loss : 0.0513
Epoch 14/20	 - loss : 0.0511
Epoch 15/20	 - loss : 0.0509
Epoch 16/20	 - loss : 0.0507
Epoch 17/20	 - loss : 0.0506
Epoch 18/20	 - loss : 0.0504
Epoch 19/20	 - loss : 0.0503
Epoch 20/20	 - loss : 0.0501
Accuracy: 97.5806
