In [1]:
import numpy as np
import pandas as pd

def gaussian(v,x):
  m = np.shape(x)[0]
  L = np.shape(v)[0]
  final_out = np.zeros((m,L))
  for i in range(m):
    for j in range(L):
      norm_arg = x[i,1:] - v[j,1:]
      norm = np.linalg.norm(norm_arg)
      bias_term = -1*v[j,0]
      final_out[i,j] = np.exp(bias_term*norm)
  return final_out

def five_fold(data):
  y = data[:,-1]
  x = np.delete(data, -1, axis=1)
  x_subsets = np.array_split(x, 5)
  y_subsets = np.array_split(y, 5)
  return x_subsets,y_subsets

def appendones(x):
  m = np.shape(x)[0]
  return np.concatenate((np.ones((m,1)),x), axis=1)

def one_hot_encode(y):
  len = np.size(y)
  encoded_y = np.zeros((len,3))
  for i in range(len):
    if y[i] == 1:
      encoded_y[i,0] = 1
    if y[i] == 2:
      encoded_y[i,1] = 1
    if y[i] == 3:
      encoded_y[i,2] = 1
  return encoded_y

def decode(y):
  len = np.shape(y)[0]
  class_labels = np.zeros(len)
  for i in range(len):
    class_labels[i] = np.argmax(y[i])+1
  return class_labels

def performance(y, pred):
  m = np.zeros((3,3)) # confusion matrix
  for p in range(len(pred)):
    if pred[p]==1 and y[p]==1:
      m[0,0]+=1
    if pred[p]==2 and y[p]==2:
      m[1,1]+=1
    if pred[p]==3 and y[p]==3:
      m[2,2]+=1
    if pred[p]==1 and y[p]==2:
      m[1,0]+=1
    if pred[p]==1 and y[p]==3:
      m[2,0]+=1
    if pred[p]==2 and y[p]==1:
      m[0,1]+=1
    if pred[p]==2 and y[p]==3:
      m[2,1]+=1
    if pred[p]==3 and y[p]==1:
      m[0,2]+=1
    if pred[p]==3 and y[p]==2:
      m[1,2]+=1
  ind_accuracy = [m[0,0]/np.sum(m[0,:]), m[1,1]/np.sum(m[1,:]), m[2,2]/np.sum(m[2,:])]
  accuracy = (m[0,0]+m[1,1]+m[2,2])/np.sum(m)
  return ind_accuracy, accuracy

In [2]:
data = pd.read_excel('/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 2/data5.xlsx')
cols = np.array(data.columns, ndmin=2)
data = data.to_numpy()
data = np.concatenate((cols,data), axis=0)

# shuffle
np.random.seed(0)
np.random.shuffle(data)

In [3]:
# using tan hyperbolic activation
x_subsets, y_subsets = five_fold(data)

ni = 8 # input layer
L = 15 # number of hidden ELM units
no = 3 # output layer

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(5):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  # normalizing input data
  mu = np.mean(x_train, axis=0)
  std = np.std(x_train, axis=0)

  x_train = (x_train-mu)/std
  x_test = (x_test-mu)/std

  # one-hot-encoding output data
  y_train_coded = one_hot_encode(y_train)
  y_test_coded = one_hot_encode(y_test)

  # appending ones (input for bias)
  x_train = appendones(x_train)
  x_test = appendones(x_test)

  ###### TRAINING ######
  # random input-to-hidden layer weights
  np.random.seed(0)
  v = np.random.randn(L, ni)
  
  # hidden layer output (tan hyperbolic activation)
  H = np.tanh(x_train @ v.T)

  # hidden-to-output layer weights
  w = np.linalg.pinv(H) @ y_train_coded

  ###### TESTING ######
  H = np.tanh(x_test @ v.T)
  y = H @ w
  pred = decode(y)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

print("mean accuracy of class 1 = {}".format(ind_accuracy[0]))
print("mean accuracy of class 2 = {}".format(ind_accuracy[1]))
print("mean accuracy of class 3 = {}".format(ind_accuracy[2]))
print("mean overall accuracy of classifier = {}".format(accuracy))

mean accuracy of class 1 = 0.8
mean accuracy of class 2 = 1.0
mean accuracy of class 3 = 0.8947368421052632
mean overall accuracy of classifier = 0.9047619047619048


In [4]:
# using gaussian activation
x_subsets, y_subsets = five_fold(data)

ni = 8 # input layer
L = 31 # number of hidden ELM units
no = 3 # output layer

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(5):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  # normalizing input data
  mu = np.mean(x_train, axis=0)
  std = np.std(x_train, axis=0)

  x_train = (x_train-mu)/std
  x_test = (x_test-mu)/std

  # one-hot-encoding output data
  y_train_coded = one_hot_encode(y_train)
  y_test_coded = one_hot_encode(y_test)

  # appending ones (input for bias)
  x_train = appendones(x_train)
  x_test = appendones(x_test)

  # TRAINING
  # random input-to-hidden layer weights
  np.random.seed(0)
  v = np.random.randn(L, ni)

  # hidden layer output (gaussian activation)
  H = gaussian(v,x_train)

  # hidden-to-output layer weights
  w = np.linalg.pinv(H) @ y_train_coded

  # TESTING
  H = gaussian(v, x_test)
  y = H @ w
  pred = decode(y)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

print("mean accuracy of class 1 = {}".format(ind_accuracy[0]))
print("mean accuracy of class 2 = {}".format(ind_accuracy[1]))
print("mean accuracy of class 3 = {}".format(ind_accuracy[2]))
print("mean overall accuracy of classifier = {}".format(accuracy))

mean accuracy of class 1 = 0.9
mean accuracy of class 2 = 0.9230769230769231
mean accuracy of class 3 = 0.9473684210526315
mean overall accuracy of classifier = 0.9285714285714286
