In [1]:
import numpy as np
from itertools import islice
from scipy.stats import mode

def h(x,w):
  return 1/(1 + np.exp(-1*np.dot(x,w)))

def loglikelihood(y,h): # cost function
  return np.sum(y*np.log10(h) + (1-y)*np.log10(1-h))

def onevall(k,y):
  return np.where(y==k, 1, 0)

def performance(y, pred):
  m = np.zeros((3,3)) # confusion matrix
  for p in range(len(pred)):
    if pred[p]==1 and y[p]==1:
      m[0,0]+=1
    if pred[p]==2 and y[p]==2:
      m[1,1]+=1
    if pred[p]==3 and y[p]==3:
      m[2,2]+=1
    if pred[p]==1 and y[p]==2:
      m[1,0]+=1
    if pred[p]==1 and y[p]==3:
      m[2,0]+=1
    if pred[p]==2 and y[p]==1:
      m[0,1]+=1
    if pred[p]==2 and y[p]==3:
      m[2,1]+=1
    if pred[p]==3 and y[p]==1:
      m[0,2]+=1
    if pred[p]==3 and y[p]==2:
      m[1,2]+=1
  ind_accuracy = [m[0,0]/np.sum(m[0,:]), m[1,1]/np.sum(m[1,:]), m[2,2]/np.sum(m[2,:])]
  accuracy = (m[0,0]+m[1,1]+m[2,2])/np.sum(m)
  return ind_accuracy, accuracy

In [2]:
# getting data from file
data = []
filename = '/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 1/data_q6_q7.txt'
with open(filename) as file:
  lines = list(islice(file,None,None,None))
  for line in lines:
    entries = line.split('\t')
    while '' in entries: entries.remove('')
    y = int(entries[-1].replace('\n',''))
    entries.pop()
    v = list(map(float,entries))
    v.append(y)
    data.append(v)

data = np.concatenate((np.ones((np.shape(data)[0],1)),np.array(data)), axis=1) #appending ones
np.random.shuffle(data) # shuffling data
x = np.array(data[:,:np.shape(data)[1]-1])
y = np.array(data[:,-1])

In [3]:
m = len(y)
nf = 5 #number of folds
x_subsets = np.array_split(x, nf)
y_subsets = np.array_split(y, nf)

ONE vs. ALL CODING (hyperparameters chosen by trial & error)

In [None]:
# LOR + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      for j in range(len(w)):
        w[j] = w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9346616541353384
mean accuracy of class 2 = 0.8871349238996299
mean accuracy of class 3 = 0.8871349238996299
mean accuracy of classifier = 0.8714285714285716


In [None]:
# LOR + L2-Norm + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      for j in range(len(w)):
        w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9346616541353384
mean accuracy of class 2 = 0.8871349238996299
mean accuracy of class 3 = 0.8871349238996299
mean accuracy of classifier = 0.880952380952381


In [None]:
# LOR + L1-Norm + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      for j in range(len(w)):
        w[j] = w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j])- (0.5*alpha*Lambda)*np.sign(w[j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9346616541353384
mean accuracy of class 2 = 0.8871349238996299
mean accuracy of class 3 = 0.8871349238996299
mean accuracy of classifier = 0.8714285714285716


In [48]:
# LOR + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.35
T = 800

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = np.random.randint(m_train)
      for j in range(len(w)):
        w[j] = w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.7733660130718955
mean accuracy of class 2 = 0.823015873015873
mean accuracy of class 3 = 0.823015873015873
mean accuracy of classifier = 0.7285714285714285


In [51]:
# LOR + L2-Norm + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.65
T = 700
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = np.random.randint(m_train)
      for j in range(len(w)):
        w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.70359477124183
mean accuracy of class 2 = 0.8468253968253968
mean accuracy of class 3 = 0.8468253968253968
mean accuracy of classifier = 0.780952380952381


In [60]:
# LOR + L1-Norm + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.45
T = 800
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = np.random.randint(m_train)
      for j in range(len(w)):
        w[j] = w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9757352941176471
mean accuracy of class 2 = 0.7468253968253968
mean accuracy of class 3 = 0.7468253968253968
mean accuracy of classifier = 0.7666666666666667


In [None]:
# LOR + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 32
alpha = 1
T = 500

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = rng.choice(m_train, nb, replace=False)
      for j in range(len(w)):
        w[j] = w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.956140350877193
mean accuracy of class 2 = 0.892081447963801
mean accuracy of class 3 = 0.892081447963801
mean accuracy of classifier = 0.9095238095238095


In [61]:
# LOR + L2-Norm + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 32
alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = rng.choice(m_train, nb, replace=False)
      for j in range(len(w)):
        w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9277777777777778
mean accuracy of class 2 = 0.854761904761905
mean accuracy of class 3 = 0.854761904761905
mean accuracy of classifier = 0.9095238095238095


In [None]:
# LOR + L1-Norm + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 32
alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # training 3 models and calculating loglikelihoods and predictions on test data
  loglikelihood_models = []
  pred_models = []
  for k in range(3):
    w = np.zeros(np.shape(x)[1])
    y_train_coded = onevall(k+1, y_train)
    y_test_coded = onevall(k+1, y_test)

    for t in range(T):
      ind = rng.choice(m_train, nb, replace=False)
      for j in range(len(w)):
        w[j] = w[j] - (alpha)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j]) - (0.5*Lambda*alpha)*np.sign(w[j])
  
    loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
    pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
    pred_models.append(pred)

  # final predictions based on prediction of 3 models
  pred_models = np.transpose(pred_models)
  loglikelihood_models = np.array(loglikelihood_models)
  final_pred = np.zeros(m_test)
  worst_model = np.argmin(loglikelihood_models)

  for i in range(m_test):
    pred_vector = np.array(pred_models[i][:])
    ties = list(pred_vector).count(1)
    if ties>1:
      index_tie = np.where(pred_vector==1)
      better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
      final_pred[i] = better_model+1
    if ties==0:
      final_pred[i] = worst_model+1
    else:
      final_pred[i] = np.argmax(pred_vector)+1

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.921328320802005
mean accuracy of class 2 = 0.8261055121349239
mean accuracy of class 3 = 0.8261055121349239
mean accuracy of classifier = 0.8904761904761905


ONE vs. ONE CODING (hyperparameters chosen by trial & error)

In [None]:
# LOR + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8724585218702867
mean accuracy of class 2 = 0.8895658263305322
mean accuracy of class 3 = 0.8895658263305322
mean accuracy of classifier = 0.8952380952380953


In [None]:
# LOR + L2-Norm + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8724585218702867
mean accuracy of class 2 = 0.8895658263305322
mean accuracy of class 3 = 0.8895658263305322
mean accuracy of classifier = 0.8952380952380953


In [None]:
# LOR + L1-Norm + BGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j]) - (0.5*Lambda*alpha)*np.sign(w[j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])- (0.5*Lambda*alpha)*np.sign(w[j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])- (0.5*Lambda*alpha)*np.sign(w[j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8724585218702867
mean accuracy of class 2 = 0.8895658263305322
mean accuracy of class 3 = 0.8895658263305322
mean accuracy of classifier = 0.8952380952380953


In [62]:
# LOR + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.65
T = 700

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train1)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train2)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train3)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8694444444444445
mean accuracy of class 2 = 0.8063492063492064
mean accuracy of class 3 = 0.8063492063492064
mean accuracy of classifier = 0.8285714285714285


In [73]:
# LOR + L2-Norm + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.65
T = 700
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train1)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train2)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train3)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.7073529411764705
mean accuracy of class 2 = 0.8420634920634921
mean accuracy of class 3 = 0.8420634920634921
mean accuracy of classifier = 0.7952380952380953


In [82]:
# LOR + L1-Norm + SGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)

alpha = 0.65
T = 700
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train1)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train2)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = np.random.randint(m_train3)
    for j in range(len(w)):
      w[j] = w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.742483660130719
mean accuracy of class 2 = 0.9579365079365079
mean accuracy of class 3 = 0.9579365079365079
mean accuracy of classifier = 0.8


In [None]:
# LOR + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 24 #batch size
alpha = 1
T = 500

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train1, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train2, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train3, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8924585218702866
mean accuracy of class 2 = 0.8895658263305322
mean accuracy of class 3 = 0.8895658263305322
mean accuracy of classifier = 0.8952380952380953


In [None]:
# LOR + L2-Norm + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 24 #batch size
alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train1, nb, replace=False)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train2, nb, replace=False)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train3, nb, replace=False)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.8989542483660131
mean accuracy of class 2 = 0.8770658263305322
mean accuracy of class 3 = 0.8770658263305322
mean accuracy of classifier = 0.9


In [None]:
# LOR + L1-Norm + MBGD
np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning)
rng = np.random.default_rng()

nb = 24 #batch size
alpha = 1
T = 500
Lambda = 0.001

accuracy_vals = [] # accuracy from all folds
ind_accuracy1 = [] # class1 accuracy from all folds
ind_accuracy2 = [] # class2 accuracy from all folds
ind_accuracy3 = [] # class3 accuracy from all folds

# 5 fold CV
for fold in range(nf):
  # test-train split
  x_test = x_subsets[fold]
  y_test = y_subsets[fold]

  x_train = np.concatenate(np.delete(x_subsets, fold, 0), axis=0)
  y_train = np.concatenate(np.delete(y_subsets, fold, 0), axis=0)

  m_train = len(y_train)
  m_test = len(y_test)

  # normalizing input data
  pp = np.amax(np.abs(x_train), axis=0)
  x_train = x_train/pp
  x_test = x_test/pp

  # data for one vs. one models
  x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
  x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
  x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

  y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
  y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
  y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
  y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
  y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
  y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

  m_train1 = np.shape(x_train1)[0]
  m_train2 = np.shape(x_train2)[0]
  m_train3 = np.shape(x_train3)[0]

  # model 1
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train1, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred1 = np.where(h(x_test,w)>0.5, 2, 1)

  # model 2
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train2, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred2 = np.where(h(x_test,w)>0.5, 3, 2)

  # model 3
  w = np.zeros(np.shape(x)[1])
  for t in range(T):
    ind = rng.choice(m_train3, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  pred3 = np.where(h(x_test,w)>0.5, 3, 1)

  # final predictions
  pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
  final_pred,c = mode(pred, axis=1)

  # performance measures
  ind_accuracy, accuracy = performance(y_test, final_pred)
  accuracy_vals.append(accuracy)
  ind_accuracy1.append(ind_accuracy[0])
  ind_accuracy2.append(ind_accuracy[1])
  ind_accuracy3.append(ind_accuracy[2])

mean_accuracy = np.mean(accuracy_vals)
mean_accuracy1 = np.mean(ind_accuracy1)
mean_accuracy2 = np.mean(ind_accuracy2)
mean_accuracy3 = np.mean(ind_accuracy3)

print("mean accuracy of class 1 = {}".format(mean_accuracy1))
print("mean accuracy of class 2 = {}".format(mean_accuracy3))
print("mean accuracy of class 3 = {}".format(mean_accuracy3))
print("mean accuracy of classifier = {}".format(mean_accuracy))

mean accuracy of class 1 = 0.9447058823529412
mean accuracy of class 2 = 0.8407021899668958
mean accuracy of class 3 = 0.8407021899668958
mean accuracy of classifier = 0.9095238095238095
