In [None]:
import numpy as np
from itertools import islice
from scipy.stats import mode

def h(x,w):
  return 1/(1 + np.exp(-1*np.dot(x,w)))

def loglikelihood(y,h): # cost function
  return np.sum(y*np.log10(h) + (1-y)*np.log10(1-h))

def onevall(k,y):
  return np.where(y==k, 1, 0)

def performance(y, pred):
  m = np.zeros((3,3)) # confusion matrix
  for p in range(len(pred)):
    if pred[p]==1 and y[p]==1:
      m[0,0]+=1
    if pred[p]==2 and y[p]==2:
      m[1,1]+=1
    if pred[p]==3 and y[p]==3:
      m[2,2]+=1
    if pred[p]==1 and y[p]==2:
      m[1,0]+=1
    if pred[p]==1 and y[p]==3:
      m[2,0]+=1
    if pred[p]==2 and y[p]==1:
      m[0,1]+=1
    if pred[p]==2 and y[p]==3:
      m[2,1]+=1
    if pred[p]==3 and y[p]==1:
      m[0,2]+=1
    if pred[p]==3 and y[p]==2:
      m[1,2]+=1
  ind_accuracy = [m[0,0]/np.sum(m[0,:]), m[1,1]/np.sum(m[1,:]), m[2,2]/np.sum(m[2,:])]
  accuracy = (m[0,0]+m[1,1]+m[2,2])/np.sum(m)
  return ind_accuracy, accuracy

In [None]:
# getting data from file
data = []
filename = '/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 1/data_q6_q7.txt'
with open(filename) as file:
  lines = list(islice(file,None,None,None))
  for line in lines:
    entries = line.split('\t')
    while '' in entries: entries.remove('')
    y = int(entries[-1].replace('\n',''))
    entries.pop()
    v = list(map(float,entries))
    v.append(y)
    data.append(v)

data = np.concatenate((np.ones((np.shape(data)[0],1)),np.array(data)), axis=1) #appending ones
np.random.shuffle(data) # shuffling data
x = np.array(data[:,:np.shape(data)[1]-1])
y = np.array(data[:,-1])

# train-test-valid split
m = len(y)
trainp = int(np.floor(0.7*m)) # 70% training
validp = int(np.floor(0.1*m)) # 10% validation

y_train = y[0:trainp]
y_valid = y[trainp:trainp+validp]
y_test = y[trainp+validp:]

x_train = x[0:trainp,:]
x_valid = x[trainp:trainp+validp,:]
x_test = x[trainp+validp:,:]

# normalizing
qq = np.amax(np.abs(x_train), axis=0)
x_train = x_train/qq
x_valid = x_valid/qq
x_test = x_test/qq

# not normalizing output data for classification

m_train = len(y_train)
m_valid = len(y_valid)
m_test = len(y_test)


ONE vs. ALL CODING (hyperparameters chosen by trial & error)

In [None]:
# LOR + BGD
alpha = 1.5
T = 500

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.8666666666666667
accuracy of class 2 = 0.9285714285714286
accuracy of class 3 = 0.8461538461538461
overall accuracy of classifier = 0.8809523809523809


In [None]:
# LOR + L2-Norm + BGD
alpha = 1
T = 500
Lambda = 0.001

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9166666666666666
accuracy of class 2 = 0.8333333333333334
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.9047619047619048


In [None]:
# LOR + L1-Norm + BGD
alpha = 1
T = 500
Lambda = 0.001

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    for j in range(len(w)):
      w[j] = w[j] - (alpha/m_train)*np.dot(h(x_train,w) - y_train_coded,x_train[:,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9166666666666666
accuracy of class 2 = 0.8333333333333334
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.9047619047619048


In [None]:
# LOR + SGD
alpha = 0.65
T = 700

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = np.random.randint(m_train)
    for j in range(len(w)):
      w[j] = w[j] - alpha*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 1.0
accuracy of class 2 = 0.7142857142857143
accuracy of class 3 = 0.7692307692307693
overall accuracy of classifier = 0.8333333333333334


In [None]:
# LOR + L2-Norm + SGD
alpha = 0.65
T = 700
Lambda = 0.001

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = np.random.randint(m_train)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - alpha*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 1.0
accuracy of class 2 = 0.7142857142857143
accuracy of class 3 = 0.7692307692307693
overall accuracy of classifier = 0.8333333333333334


In [None]:
# LOR + L1-Norm + SGD
alpha = 0.65
T = 700
Lambda = 0.01

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = np.random.randint(m_train)
    for j in range(len(w)):
      w[j] = w[j] - alpha*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9333333333333333
accuracy of class 2 = 0.8571428571428571
accuracy of class 3 = 0.7692307692307693
overall accuracy of classifier = 0.8571428571428571


In [None]:
# LOR + MBGD
nb = 32 #batch size
rng = np.random.default_rng()

alpha = 1
T = 600

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = rng.choice(m_train, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9166666666666666
accuracy of class 2 = 0.8333333333333334
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.9047619047619048


In [None]:
# LOR + L2-Norm + MBGD
nb = 32 #batch size
rng = np.random.default_rng()

alpha = 1
T = 600
Lambda = 0.001

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = rng.choice(m_train, nb, replace=False)
    for j in range(len(w)):
      w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9166666666666666
accuracy of class 2 = 0.7222222222222222
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.8571428571428571


In [None]:
# LOR + L1-Norm + MBGD
nb = 32 #batch size
rng = np.random.default_rng()

alpha = 1
T = 600
Lambda = 0.001

w_models = []
loglikelihood_models = []
pred_models = []

# training 3 models and storing weights, loglikelihoods and predictions on test data
for k in range(3):
  w = np.zeros(np.shape(x)[1])
  y_train_coded = onevall(k+1, y_train)
  y_test_coded = onevall(k+1, y_test)

  for t in range(T):
    ind = rng.choice(m_train, nb, replace=False)
    for j in range(len(w)):
      w[j] = w[j] - (alpha/nb)*np.dot(h(x_train,w)[ind] - y_train_coded[ind],x_train[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
  
  w_models.append(w)
  loglikelihood_models.append(loglikelihood(y_train_coded, h(x_train,w)))
  pred = np.where(h(x_test,w)>0.5, 1, 0) # prediction of each model on test data
  pred_models.append(pred)

# final predictions based on prediction of 3 models
pred_models = np.transpose(pred_models)
loglikelihood_models = np.array(loglikelihood_models)
final_pred = np.zeros(m_test)
worst_model = np.argmin(loglikelihood_models)

for i in range(m_test):
  pred_vector = np.array(pred_models[i][:])
  ties = list(pred_vector).count(1)
  if ties>1:
    index_tie = np.where(pred_vector==1)
    better_model = np.where(loglikelihood_models==np.max(loglikelihood_models[index_tie]))[0]
    final_pred[i] = better_model+1
  if ties==0:
    final_pred[i] = worst_model+1
  else:
    final_pred[i] = np.argmax(pred_vector)+1

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.9166666666666666
accuracy of class 2 = 0.8333333333333334
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.9047619047619048


ONE vs. ONE CODING (hyperparameters chosen by trial & error)

In [None]:
# data for models
x_train1 = np.delete(x_train, np.where(y_train==3), axis=0) # 1vs2
x_train2 = np.delete(x_train, np.where(y_train==1), axis=0) # 2vs3
x_train3 = np.delete(x_train, np.where(y_train==2), axis=0) # 1vs3

y_train1 = np.delete(y_train, np.where(y_train==3), axis=0)
y_train1 = np.where(y_train1==1, 0, 1) # class1=0, class2=1
y_train2 = np.delete(y_train, np.where(y_train==1), axis=0)
y_train2 = np.where(y_train2==2, 0, 1) # class2=0, class3=1
y_train3 = np.delete(y_train, np.where(y_train==2), axis=0)
y_train3 = np.where(y_train3==1, 0, 1) # class1=0, class3=1

m_train1 = np.shape(x_train1)[0]
m_train2 = np.shape(x_train2)[0]
m_train3 = np.shape(x_train3)[0]

In [None]:
# LOR + BGD
T = 500
alpha = 1

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.6923076923076923
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.8809523809523809


In [None]:
# LOR + L2-Norm + BGD
T = 500
alpha = 1
Lambda = 0.001

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.6923076923076923
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.8809523809523809


In [None]:
# LOR + L1-Norm + BGD
T = 500
alpha = 1
Lambda = 0.01

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train1)*np.dot(h(x_train1,w) - y_train1,x_train1[:,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train2)*np.dot(h(x_train2,w) - y_train2,x_train2[:,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  for j in range(len(w)):
    w[j] = w[j] - (alpha/m_train3)*np.dot(h(x_train3,w) - y_train3,x_train3[:,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.6923076923076923
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.8809523809523809


In [None]:
# LOR + SGD
T = 600
alpha = 1

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train1)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train2)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train3)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.8461538461538461
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.9285714285714286


In [None]:
# LOR + L2-Norm + SGD
T = 600
alpha = 1
Lambda = 0.0001

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train1)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train2)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train3)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.5384615384615384
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.8333333333333334


In [None]:
# LOR + L1-Norm + SGD
T = 600
alpha = 1
Lambda = 0.001

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train1)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train2)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = np.random.randint(m_train3)
  for j in range(len(w)):
    w[j] = w[j] - (alpha)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.6153846153846154
accuracy of class 2 = 1.0
accuracy of class 3 = 1.0
overall accuracy of classifier = 0.8809523809523809


In [None]:
# LOR + MBGD
nb = 24 #batch size
rng = np.random.default_rng()
T = 600
alpha = 1

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train1, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train2, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train3, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.7692307692307693
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.9047619047619048


In [None]:
# LOR + L2-Norm + MBGD
nb = 24 #batch size
rng = np.random.default_rng()
T = 600
alpha = 1
Lambda = 0.001

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train1, nb, replace=False)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train2, nb, replace=False)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train3, nb, replace=False)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.6153846153846154
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.8571428571428571


In [None]:
# LOR + L1-Norm + MBGD
# LOR + MBGD
nb = 24 #batch size
rng = np.random.default_rng()
T = 600
alpha = 1

# model 1
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train1, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train1,w)[ind] - y_train1[ind],x_train1[ind,j]) - (0.5*alpha*Lambda)*np.sign(w[j])
pred1 = np.where(h(x_test,w)>0.5, 2, 1)

# model 2
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train2, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train2,w)[ind] - y_train2[ind],x_train2[ind,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred2 = np.where(h(x_test,w)>0.5, 3, 2)

# model 3
w = np.zeros(np.shape(x)[1])
for t in range(T):
  ind = rng.choice(m_train3, nb, replace=False)
  for j in range(len(w)):
    w[j] = w[j] - (alpha/nb)*np.dot(h(x_train3,w)[ind] - y_train3[ind],x_train3[ind,j])- (0.5*alpha*Lambda)*np.sign(w[j])
pred3 = np.where(h(x_test,w)>0.5, 3, 1)

# final predictions
pred = np.concatenate((np.array(pred1, ndmin=2).T,np.array(pred2, ndmin=2).T,np.array(pred3, ndmin=2).T), axis=1)
final_pred,c = mode(pred, axis=1)

# performance measures
ind_accuracy, accuracy = performance(y_test, final_pred)
print("accuracy of class 1 = {}".format(ind_accuracy[0]))
print("accuracy of class 2 = {}".format(ind_accuracy[1]))
print("accuracy of class 3 = {}".format(ind_accuracy[2]))
print("overall accuracy of classifier = {}".format(accuracy))

accuracy of class 1 = 0.7692307692307693
accuracy of class 2 = 1.0
accuracy of class 3 = 0.9230769230769231
overall accuracy of classifier = 0.9047619047619048
