In [3]:
import numpy as np
import pandas as pd

def sigmoid(x,w):
  return 1/(1 + np.exp(-1*np.dot(x,w.T)))

def pred(y):
  return (np.argmax(y, axis=1) + 1)

def holdout(data, trainp, validp):
  m = np.shape(data)[0]

  train = data[0 : int(np.floor(m*trainp/100))]
  valid = data[int(np.floor(m*trainp/100)) : int(np.floor(m*trainp/100))+int(np.floor(m*validp/100))]
  test = data[int(np.floor(m*trainp/100))+int(np.floor(m*validp/100)) : None]

  y_train = train[:,-1] #shape = (rows,)
  x_train = np.delete(train, -1, axis=1)

  y_valid = valid[:,-1] #shape = (rows,)
  x_valid = np.delete(valid, -1, axis=1)

  y_test = test[:,-1] #shape = (rows,)
  x_test = np.delete(test, -1, axis=1)

  return x_train,x_valid,x_test,y_train,y_valid,y_test

def five_fold(data):
  y = data[:,-1]
  x = np.delete(data, -1, axis=1)
  x_subsets = np.array_split(x, 5)
  y_subsets = np.array_split(y, 5)
  return x_subsets,y_subsets

def appendones(x):
  m = np.shape(x)[0]
  return np.concatenate((np.ones((m,1)),x), axis=1)

def one_hot_encode(y):
  len = np.size(y)
  encoded_y = np.zeros((len,3))
  for i in range(len):
    if y[i] == 1:
      encoded_y[i,0] = 1
    if y[i] == 2:
      encoded_y[i,1] = 1
    if y[i] == 3:
      encoded_y[i,2] = 1
  return encoded_y

def performance(y, pred):
  m = np.zeros((3,3)) # confusion matrix
  for p in range(len(pred)):
    if pred[p]==1 and y[p]==1:
      m[0,0]+=1
    if pred[p]==2 and y[p]==2:
      m[1,1]+=1
    if pred[p]==3 and y[p]==3:
      m[2,2]+=1
    if pred[p]==1 and y[p]==2:
      m[1,0]+=1
    if pred[p]==1 and y[p]==3:
      m[2,0]+=1
    if pred[p]==2 and y[p]==1:
      m[0,1]+=1
    if pred[p]==2 and y[p]==3:
      m[2,1]+=1
    if pred[p]==3 and y[p]==1:
      m[0,2]+=1
    if pred[p]==3 and y[p]==2:
      m[1,2]+=1
  ind_accuracy = [m[0,0]/np.sum(m[0,:]), m[1,1]/np.sum(m[1,:]), m[2,2]/np.sum(m[2,:])]
  accuracy = (m[0,0]+m[1,1]+m[2,2])/np.sum(m)
  return ind_accuracy, accuracy

In [4]:
data = pd.read_excel('/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 2/data5.xlsx')
cols = np.array(data.columns, ndmin=2)
data = data.to_numpy()
data = np.concatenate((cols,data), axis=0)

# shuffle & train-test-valid-split
np.random.seed(0)
np.random.shuffle(data)
x_train, x_valid, x_test, y_train, y_valid, y_test = holdout(data, 70, 10)

# normalizing input data
mu = np.mean(x_train, axis=0)
std = np.std(x_train, axis=0)

x_train = (x_train-mu)/std
x_valid = (x_valid-mu)/std
x_test = (x_test-mu)/std

# one-hot-encoding output data
y_train_coded = one_hot_encode(y_train)
y_valid_coded = one_hot_encode(y_valid)
y_test_coded = one_hot_encode(y_test)

# appending ones (input for bias)
x_train = appendones(x_train)
x_valid = appendones(x_valid)
x_test = appendones(x_test)

m_train = np.size(y_train)
m_valid = np.size(y_valid)
m_test = np.size(y_test)

ni = np.shape(x_train)[1] # number of input neurons (features)
no = 3 # number of ouput neurons (classes)

In [None]:
# grid search for scale parameter to choose number of hidden neurons
scale = list(np.arange(2,11))

accuracy_vals = np.zeros(((len(scale),len(scale))))
for s1 in scale:
  for s2 in scale:
    nh1 = (m_train/(s1*(ni + no))).astype(int)
    nh2 = (m_train/(s2*(nh1 + no))).astype(int)

    T = 200 #iters
    alpha = 0.1 #learning rate

    # random weights
    np.random.seed(0)
    w1i = np.random.randn(nh1, ni)
    np.random.seed(0)
    w21 = np.random.randn(nh2, nh1)
    np.random.seed(0)
    wo2 = np.random.randn(no, nh2)

    # training
    for i in range(T):

      # forward path propagation
      y1 = sigmoid(x_train,w1i)
      y2 = sigmoid(y1,w21)
      y =  sigmoid(y2,wo2)

      # back propagation
      
      """updating output layer weights"""
      delta = (y_train_coded - y) * y * (1 - y)
      wo2 = wo2 + alpha * np.dot(delta.T, y2)

      """updating 2nd hidden layer weights"""
      delta = y2*(1-y2) * np.dot(delta,wo2)
      w21 = w21 + alpha * np.dot(delta.T, y1)

      """updating 1st hidden layer weights"""
      delta = y1*(1-y1) * np.dot(delta,w21)
      w1i = w1i + alpha * np.dot(delta.T, x_train)

    # validation
    y1 = sigmoid(x_valid,w1i)
    y2 = sigmoid(y1,w21)
    y =  sigmoid(y2,wo2)

    y = pred(y)

    # performance measures
    ind_accuracy, accuracy = performance(y_valid, y)
    accuracy_vals[scale.index(s1)][scale.index(s2)] = accuracy
    
# index of maximum accuracy
index = np.unravel_index(np.argmax(accuracy_vals, axis=None), accuracy_vals.shape)
    
print("maximum validation accuracy = {}".format(accuracy_vals[index]))
print("index = {}".format(index))
print("Optimal s1 value = {}\nOptimal s2 value = {}".format(scale[index[0]],scale[index[1]]))

maximum validation accuracy = 1.0
index = (0, 0)
Optimal s1 value = 2
Optimal s2 value = 2


In [None]:
# optimal number of hidden neurons
s1 = 2
s2 = 2
nh1 = int(m_train/(s1*(ni + no)))
nh2 = int(m_train/(s2*(nh1 + no)))

print(nh1, nh2)

6 8


In [None]:
nh1 = 6 # number of neurons in first hidden layer
nh2 = 8 # number of neurons in second hidden layer

T = 200 #iters
alpha = 0.1 #learning rate

# random weights
np.random.seed(0)
w1i = np.random.randn(nh1, ni)
np.random.seed(0)
w21 = np.random.randn(nh2, nh1)
np.random.seed(0)
wo2 = np.random.randn(no, nh2)

# training
for i in range(T):

  # forward path propagation
  y1 = sigmoid(x_train,w1i)
  y2 = sigmoid(y1,w21)
  y =  sigmoid(y2,wo2)

  # back propagation
  
  """updating output layer weights"""
  delta = (y_train_coded - y) * y * (1 - y)
  wo2 = wo2 + alpha * np.dot(delta.T, y2)

  """updating 2nd hidden layer weights"""
  delta = y2*(1-y2) * np.dot(delta,wo2)
  w21 = w21 + alpha * np.dot(delta.T, y1)

  """updating 1st hidden layer weights"""
  delta = y1*(1-y1) * np.dot(delta,w21)
  w1i = w1i + alpha * np.dot(delta.T, x_train)

# testing
y1 = sigmoid(x_test,w1i)
y2 = sigmoid(y1,w21)
y =  sigmoid(y2,wo2)

y = pred(y)

# performance measures
ind_accuracy, accuracy = performance(y_test, y)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 1.0
accuracy of class 2 = 1.0
accuracy of class 3 = 0.8947368421052632
overall accuracy of classifier = 0.9523809523809523


In [6]:
# using 5-fold cross validation
x_subsets, y_subsets = five_fold(data)

T = 200
alpha = 0.1
ni = np.shape(x_train)[1] # number of input neurons (features)
no = 3 # number of ouput neurons (classes)
nh1 = 6 # number of neurons in first hidden layer
nh2 = 8 # number of neurons in second hidden layer

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(5):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  # normalizing input data
  mu = np.mean(x_train, axis=0)
  std = np.std(x_train, axis=0)

  x_train = (x_train-mu)/std
  x_test = (x_test-mu)/std

  # one-hot-encoding output data
  y_train_coded = one_hot_encode(y_train)
  y_test_coded = one_hot_encode(y_test)

  # appending ones (input for bias)
  x_train = appendones(x_train)
  x_test = appendones(x_test)

  # random weights
  np.random.seed(0)
  w1i = np.random.randn(nh1, ni)
  np.random.seed(0)
  w21 = np.random.randn(nh2, nh1)
  np.random.seed(0)
  wo2 = np.random.randn(no, nh2)

  # training
  for i in range(T):

    # forward path propagation
    y1 = sigmoid(x_train,w1i)
    y2 = sigmoid(y1,w21)
    y =  sigmoid(y2,wo2)

    # back propagation
    
    """updating output layer weights"""
    delta = (y_train_coded - y) * y * (1 - y)
    wo2 = wo2 + alpha * np.dot(delta.T, y2)

    """updating 2nd hidden layer weights"""
    delta = y2*(1-y2) * np.dot(delta,wo2)
    w21 = w21 + alpha * np.dot(delta.T, y1)

    """updating 1st hidden layer weights"""
    delta = y1*(1-y1) * np.dot(delta,w21)
    w1i = w1i + alpha * np.dot(delta.T, x_train)

  # testing
  y1 = sigmoid(x_test,w1i)
  y2 = sigmoid(y1,w21)
  y =  sigmoid(y2,wo2)

  y = pred(y)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, y)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])


print("mean accuracy of class 1 = {}".format(np.mean(ind_accuracy1)))
print("mean accuracy of class 2 = {}".format(np.mean(ind_accuracy2)))
print("mean accuracy of class 3 = {}".format(np.mean(ind_accuracy3)))
print("overall accuracy of classifier = {}".format(np.mean(accuracy_vals)))

mean accuracy of class 1 = 0.9453781512605042
mean accuracy of class 2 = 0.9625
mean accuracy of class 3 = 0.9356140350877192
overall accuracy of classifier = 0.9428571428571428
