### ***6***

# Multiclass Logistic Regression + BGD

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%reset-f

In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
import math
import copy

In [4]:
path = '/content/drive/MyDrive/ML/data_q6_q7.xlsx' 
df = pd.read_excel(path)
insts = df.to_numpy()
m= len(insts[:,0]) #No of instinces
ones = np.ones((m,1))
insts = np.append(ones,insts,axis=1)
n = len(insts[0,:])-1 #no of features including f0 n=31
for i in range(1,n,1):
  insts[:,i] = (insts[:,i] - insts[:,i].mean())/insts[:,i].std() #Normalize the data
insts_mb = copy.deepcopy(insts)
insts_s = copy.deepcopy(insts)
np.random.shuffle(insts)
insts_tr = insts[0:int(m*0.7),:]
insts_te = insts[int(m*0.7):int(m*0.9),:]
insts_val = insts[int(m*0.9):m,:]

In [5]:
# hypothesis for logistic regression which is the prediction 
def hypothesis(w,insts,m,n):
  z = np.dot(insts[:,0:n],w.T)
  h = 1/(1+np.exp(-z.astype(np.float32))) #working
  return h

# cost function for logistic regression
def cost(h,insts,m):
  c = 0
  for i in range(m):
    c_te = (-insts_tr[i,-1]*np.log(h[i]) - (1- insts_tr[i,-1])*np.log(1 - h[i]))/m
    if not(np.isnan(c_te)) and not(math.isinf(c_te)):
      c = c+ c_te
  return c
# update of weight values for logistic regression
def update(w,alpha,insts,h,m,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = np.dot( (h[0:m]- insts[0:m,-1]),insts[0:m,i] )
  for i in range(n):
    w[i] = w[i] - alpha*d[i]

  return w
def update_stotastic(w,alpha,insts,h,index,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = (h[index]- insts[index,-1])*insts[index,i] 
  for i in range(n):
    w[i] = w[i] - alpha*d[i]

  return w

def performance(mat,m):
  for i in range(3):
    ia = mat[i,i]/(mat[i,0]+mat[i,1]+mat[i,2])
    print('accuracy for group',end='')
    print(i+1, end=': ')
    print(ia)
  print('overall accuracy:',end= ' ')
  print((mat[0,0]+mat[1,1]+mat[2,2])/m)

  

In [6]:
opt_weights = [] # stores the weight values for all the models
opt_cost = [] # stores the final optimal cost value at the end of each group

In [7]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 600
  alpha = np.linspace(0.001,0.01,100)
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      h = hypothesis(w,train,m_tr,n)
      w = update(w,alpha[j],train,h,m_tr,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,m_val)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost.append(cost(h,train,m_tr)) # cal the final cost value
  opt_weights.append(w_opt) # append the optimal weight values for every group

In [8]:
m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost[i-1]
  i+=1

  

In [9]:
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
confusion_matrix

array([[11.,  0.,  1.],
       [ 1., 13.,  0.],
       [ 0.,  0., 16.]])

In [10]:
performance(confusion_matrix,m_te)

accuracy for group1: 0.9166666666666666
accuracy for group2: 0.9285714285714286
accuracy for group3: 1.0
overall accuracy: 0.9523809523809523


# Multiclass Logistic regression + MBG

In [11]:
np.random.shuffle(insts_mb)
insts_tr = insts_mb[0:int(m*0.7),:]
insts_te = insts_mb[int(m*0.7):int(m*0.9),:]
insts_val = insts_mb[int(m*0.9):m,:]

In [12]:
opt_weights_mb = [] # stores the weight values for all the models
opt_cost_mb = [] # stores the final optimal cost value at the end of each group

In [13]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 600
  alpha = np.linspace(0.001,0.01,100)
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  m_batch = 20
  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      np.random.shuffle(train)
      h = hypothesis(w,train,m_tr,n)
      w = update(w,alpha[j],train,h,m_batch,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,m_val)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost_mb.append(cost(h,train,m_tr)) # cal the final cost value
  opt_weights_mb.append(w_opt) # append the optimal weight values for every group
m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights_mb:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost_mb[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost_mb[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost_mb[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)
  

accuracy for group1: 0.6875
accuracy for group2: 1.0
accuracy for group3: 0.9230769230769231
overall accuracy: 0.8571428571428571


# Multiclass Logistic regression + SGD

In [14]:
np.random.shuffle(insts_s)
insts_tr = insts_s[0:int(m*0.7),:]
insts_te = insts_s[int(m*0.7):int(m*0.9),:]
insts_val = insts_s[int(m*0.9):m,:]

In [15]:
opt_weights_s = [] # stores the weight values for all the models
opt_cost_s = [] # stores the final optimal cost value at the end of each group

In [16]:

for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 1000
  alpha = np.linspace(0.01,0.1,100)
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      index = np.random.randint(0,m_tr)
      h = hypothesis(w,train,m_tr,n)
      w = update_stotastic(w,alpha[j],train,h,index,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,m_val)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost_s.append(cost(h,train,m_tr)) # cal the final cost value
  opt_weights_s.append(w_opt) # append the optimal weight values for every group
m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights_s:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost_s[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost_s[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost_s[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)
  

accuracy for group1: 0.5333333333333333
accuracy for group2: 1.0
accuracy for group3: 1.0
overall accuracy: 0.8333333333333334


# Multiclass Logistic regression + BGD + L2-NORM

In [17]:
%reset-f

In [18]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
import math
import copy

In [19]:
path = '/content/drive/MyDrive/ML/data_q6_q7.xlsx' 
df = pd.read_excel(path)
insts = df.to_numpy()
m= len(insts[:,0]) #No of instinces
ones = np.ones((m,1))
insts = np.append(ones,insts,axis=1)
n = len(insts[0,:])-1 #no of features including f0 n=31
for i in range(1,n,1):
  insts[:,i] = (insts[:,i] - insts[:,i].mean())/insts[:,i].std() #Normalize the data
insts_mb = copy.deepcopy(insts)
insts_s = copy.deepcopy(insts)
np.random.shuffle(insts)
insts_tr = insts[0:int(m*0.7),:]
insts_te = insts[int(m*0.7):int(m*0.9),:]
insts_val = insts[int(m*0.9):m,:]

In [20]:
# hypothesis for logistic regression which is the prediction 
def hypothesis(w,insts,m,n):
  z = np.dot(insts[:,0:n],w.T)
  h = 1/(1+np.exp(-z.astype(np.float32))) #working
  return h

# cost function for logistic regression
def cost(h,insts,lamb,m,w):
  c = 0
  for i in range(m):
    c_te = (-insts_tr[i,-1]*np.log(h[i]) - (1- insts_tr[i,-1])*np.log(1 - h[i]))/m
    if not(np.isnan(c_te)) and not(math.isinf(c_te)):
      c = c+ c_te
  return c + 0.5*lamb*(np.dot(w,w))
# update of weight values for logistic regression
def update(w,alpha,lamb,insts,h,m,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = np.dot( (h[0:m]- insts[0:m,-1]),insts[0:m,i] )
  for i in range(n):
    w[i] = w[i]*(1-alpha*lamb) - alpha*d[i]

  return w
def update_stotastic(w,alpha,lamb,insts,h,index,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = (h[index]- insts[index,-1])*insts[index,i] 
  for i in range(n):
    w[i] = w[i]*(1-alpha*lamb) - alpha*d[i]

  return w

def performance(mat,m):
  ind_acc = np.ones(3)
  for i in range(3):
    ind_acc[i] = mat[i,i]/(mat[i,0]+mat[i,1]+mat[i,2])
    print('accuracy for group',end='')
    print(i+1, end=': ')
    print(ind_acc[i])
  print('overall accuracy:',end= ' ')
  print((mat[0,0]+mat[1,1]+mat[2,2])/m)

  

In [21]:
opt_weights = [] # stores the weight values for all the models
opt_cost = [] # stores the final optimal cost value at the end of each group

In [22]:
for i in range(1,4,1):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 600
  alpha = np.linspace(0.001,0.01,100)
  lamb = np.linspace(0,100,10)
  b_min = 10000
  c = np.ones(len(lamb))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  for j in range(len(lamb)):
    for a in alpha:
      w = np.random.rand(n)
      j_list = np.ones(itr)
      for k in range(itr):
        h = hypothesis(w,train,m_tr,n)
        w = update(w,a,lamb[j],train,h,m_tr,n)
      h = hypothesis(w,val,m_val,n)
      c[j] = cost(h,val,lamb[j],m_val,w)
      if c[j] < b_min:
        min = j
        b_min = c[j]
        w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost.append(cost(h,train,lamb[min],m_tr,w_opt)) # cal the final cost value
  opt_weights.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)

accuracy for group1: 0.625
accuracy for group2: 0.9375
accuracy for group3: 0.8888888888888888
overall accuracy: 0.8571428571428571


# Multiclass Logistic regression + MBG + L2-NORM

In [23]:
np.random.shuffle(insts_mb)
insts_tr = insts_mb[0:int(m*0.7),:]
insts_te = insts_mb[int(m*0.7):int(m*0.9),:]
insts_val = insts_mb[int(m*0.9):m,:]

In [24]:
opt_weights = [] # stores the weight values for all the models
opt_cost = [] # stores the final optimal cost value at the end of each group

In [25]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 1000
  alpha = np.linspace(0.001,0.01,10)
  lamb = 10
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  m_batch = 20

  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      np.random.shuffle(train)
      h = hypothesis(w,train,m_tr,n)
      w = update(w,alpha[j],lamb,train,h,m_batch,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,lamb,m_val,w)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost.append(cost(h,train,lamb,m_tr,w_opt)) # cal the final cost value
  opt_weights.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)

accuracy for group1: 0.8823529411764706
accuracy for group2: 1.0
accuracy for group3: 0.8235294117647058
overall accuracy: 0.8809523809523809


# Multiclass Logistic regression + SGD + L2-NORM

In [32]:
%reset-f
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
import math
import copy
path = '/content/drive/MyDrive/ML/data_q6_q7.xlsx' 
df = pd.read_excel(path)
insts = df.to_numpy()
m= len(insts[:,0]) #No of instinces
ones = np.ones((m,1))
insts = np.append(ones,insts,axis=1)
n = len(insts[0,:])-1 #no of features including f0 n=31
for i in range(1,n,1):
  insts[:,i] = (insts[:,i] - insts[:,i].mean())/insts[:,i].std() #Normalize the data
np.random.shuffle(insts)
insts_tr = insts[0:int(m*0.7),:]
insts_te = insts[int(m*0.7):int(m*0.9),:]
insts_val = insts[int(m*0.9):m,:]
# hypothesis for logistic regression which is the prediction 
def hypothesis(w,insts,m,n):
  z = np.dot(insts[:,0:n],w.T)
  h = 1/(1+np.exp(-z.astype(np.float32))) #working
  return h

# cost function for logistic regression
def cost(h,insts,lamb,m,w):
  c = 0
  for i in range(m):
    c_te = (-insts_tr[i,-1]*np.log(h[i]) - (1- insts_tr[i,-1])*np.log(1 - h[i]))/m
    if not(np.isnan(c_te)) and not(math.isinf(c_te)):
      c = c+ c_te
  return c + 0.5*lamb*(np.dot(w,w))
# update of weight values for logistic regression
def update(w,alpha,lamb,insts,h,m,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = np.dot( (h[0:m]- insts[0:m,-1]),insts[0:m,i] )
  for i in range(n):
    w[i] = w[i]*(1-alpha*lamb) - alpha*d[i]

  return w
def update_stotastic(w,alpha,lamb,insts,h,index,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = (h[index]- insts[index,-1])*insts[index,i] 
  for i in range(n):
    w[i] = w[i]*(1-alpha*lamb) - alpha*d[i]

  return w

def performance(mat,m):
  ind_acc = np.ones(3)
  for i in range(3):
    ind_acc[i] = mat[i,i]/(mat[i,0]+mat[i,1]+mat[i,2])
    print('accuracy for group',end='')
    print(i+1, end=': ')
    print(ind_acc[i])
  print('overall accuracy:',end= ' ')
  print((mat[0,0]+mat[1,1]+mat[2,2])/m)

  
opt_weights_s = [] # stores the weight values for all the models
opt_cost_s = [] # stores the final optimal cost value at the end of each group
for i in range(1,4):
  train = copy.deepcopy(insts_tr)
  val = copy.deepcopy(insts_val)
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = np.linspace(100,1000,10).astype(int)
  alpha = np.linspace(0.0001,0.001,10)
  lamb = 10
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  

  for j in range(len(alpha)):
    for it in itr:
      w = np.random.rand(n)
      
      for k in range(it):
        index = np.random.randint(m_tr)
        h = hypothesis(w,train,m_tr,n)
        w = update_stotastic(w,alpha[j],lamb,train,h,index,n)
      h = hypothesis(w,val,m_val,n)
      c[j] = cost(h,val,lamb,m_val,w)
      if c[j] < b_min:
        min = j
        b_min = c[j]
        w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost_s.append(cost(h,train,lamb,m_tr,w_opt)) # cal the final cost value
  opt_weights_s.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights_s:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost_s[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost_s[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost_s[i-1]
  i+=1
y_actual = pd.Series(y_acu, name='Actual')
y_pred = pd.Series(y_pre, name='Predicted')
confmat = pd.crosstab(y_actual, y_pred)
confmat = np.asarray(confmat)

performance(confmat,m_te)

accuracy for group1: 0.36363636363636365
accuracy for group2: 0.08333333333333333
accuracy for group3: 0.8947368421052632
overall accuracy: 0.5238095238095238


# Multiclass Logistic regression + BGD + L1-NORM

In [33]:
%reset-f

In [34]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 
import math
import copy

In [35]:
path = '/content/drive/MyDrive/ML/data_q6_q7.xlsx' 
df = pd.read_excel(path)
insts = df.to_numpy()
m= len(insts[:,0]) #No of instinces
ones = np.ones((m,1))
insts = np.append(ones,insts,axis=1)
n = len(insts[0,:])-1 #no of features including f0 n=31
for i in range(1,n,1):
  insts[:,i] = (insts[:,i] - insts[:,i].mean())/insts[:,i].std() #Normalize the data
insts_mb = copy.deepcopy(insts)
insts_s = copy.deepcopy(insts)
np.random.shuffle(insts)
insts_tr = insts[0:int(m*0.7),:]
insts_te = insts[int(m*0.7):int(m*0.9),:]
insts_val = insts[int(m*0.9):m,:]

In [36]:
# hypothesis for logistic regression which is the prediction 
def hypothesis(w,insts,m,n):
  z = np.dot(insts[:,0:n],w.T)
  h = 1/(1+np.exp(-z.astype(np.float32))) #working
  return h

# cost function for logistic regression
def cost(h,insts,lamb,m,w):
  c = 0
  for i in range(m):
    c_te = (-insts_tr[i,-1]*np.log(h[i]) - (1- insts_tr[i,-1])*np.log(1 - h[i]))/m
    if not(np.isnan(c_te)) and not(math.isinf(c_te)):
      c = c+ c_te
  w_abs = np.absolute(w)
  w_abs = np.sum(w_abs)
  return c/m + 0.5*lamb*(w_abs)
# update of weight values for logistic regression
def update(w,alpha,lamb,insts,h,m,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = np.dot( (h[0:m]- insts[0:m,-1]),insts[0:m,i] )
  for i in range(n):
    w[i] = (w[i]-alpha*lamb*np.sign(w[i])) - alpha*d[i]

  return w
def update_stotastic(w,alpha,lamb,insts,h,index,n):
  d = np.zeros(n)
  for i in range(n):
    d[i] = (h[index]- insts[index,-1])*insts[index,i] 
  for i in range(n):
    w[i] = (w[i]-alpha*lamb*np.sign(w[i])) - alpha*d[i]

  return w

def performance(mat,m):
  for i in range(3):
    ia = mat[i,i]/(mat[i,0]+mat[i,1]+mat[i,2])
    print('accuracy for group',end='')
    print(i+1, end=': ')
    print(ia)
  print('overall accuracy:',end= ' ')
  print((mat[0,0]+mat[1,1]+mat[2,2])/m)

In [37]:
opt_weights = [] # stores the weight values for all the models
opt_cost = [] # stores the final optimal cost value at the end of each group

In [38]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 1000
  alpha = np.linspace(0.001,0.01,10)
  lamb = np.linspace(0,100,10)
  b_min = 10000
  c = np.ones(len(lamb))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  for j in range(len(lamb)):
    for a in alpha:
      w = np.random.rand(n)
      j_list = np.ones(itr)
      for k in range(itr):
        h = hypothesis(w,train,m_tr,n)
        w = update(w,a,lamb[j],train,h,m_tr,n)
      h = hypothesis(w,val,m_val,n)
      c[j] = cost(h,val,lamb[j],m_val,w)
      if c[j] < b_min:
        min = j
        b_min = c[j]
        w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost.append(cost(h,train,lamb[min],m_tr,w_opt)) # cal the final cost value
  opt_weights.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)

accuracy for group1: 0.8666666666666667
accuracy for group2: 1.0
accuracy for group3: 0.9230769230769231
overall accuracy: 0.9285714285714286


# Multiclass Logistic regression + MBG + L1-NORM

In [48]:
np.random.shuffle(insts_mb)
insts_tr = insts_mb[0:int(m*0.7),:]
insts_te = insts_mb[int(m*0.7):int(m*0.9),:]
insts_val = insts_mb[int(m*0.9):m,:]

In [49]:
opt_weights = [] # stores the weight values for all the models
opt_cost = [] # stores the final optimal cost value at the end of each group

In [50]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 1000
  alpha = np.linspace(0.001,0.01,10)
  lamb = 10
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  m_batch = 20

  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      np.random.shuffle(train)
      h = hypothesis(w,train,m_tr,n)
      w = update(w,alpha[j],lamb,train,h,m_batch,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,lamb,m_val,w)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost.append(cost(h,train,lamb,m_tr,w_opt)) # cal the final cost value
  opt_weights.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)

accuracy for group1: 0.0
accuracy for group2: 1.0
accuracy for group3: 1.0
overall accuracy: 0.6666666666666666


# Multiclass Logistic regression + SGD + L1-NORM

In [91]:
np.random.shuffle(insts_s)
insts_tr = insts_s[0:int(m*0.7),:]
insts_te = insts_s[int(m*0.7):int(m*0.9),:]
insts_val = insts_s[int(m*0.9):m,:]

In [92]:
opt_weights_s = [] # stores the weight values for all the models
opt_cost_s = [] # stores the final optimal cost value at the end of each group

In [93]:
for i in range(1,4):
  train = insts_tr.copy()
  val = insts_val.copy()
  train[:,-1] = (train[:,-1]==i).astype(int) # assignes the value 1 if the level is i
  val[:,-1] = (val[:,-1]==i).astype(int)# assignes the value 0 if the level is not i
  m_tr = len(train[:,0])
  m_val = len(val[:,0])
  itr = 1000
  alpha = np.linspace(0.0001,0.001,10)
  lamb = 10
  b_min = 10000
  c = np.ones(len(alpha))
  w_opt = np.zeros(n)  #stores the optimal weight value
  min=0
  

  for j in range(len(alpha)):
    w = np.random.rand(n)
    j_list = np.ones(itr)
    for k in range(itr):
      index = np.random.randint(m_tr)
      h = hypothesis(w,train,m_tr,n)
      w = update_stotastic(w,alpha[j],lamb,train,h,index,n)
    h = hypothesis(w,val,m_val,n)
    c[j] = cost(h,val,lamb,m_val,w)
    if c[j] < b_min:
      min = j
      b_min = c[j]
      w_opt = w  
  h = hypothesis(w_opt,train,m_tr,n)
  opt_cost_s.append(cost(h,train,lamb,m_tr,w_opt)) # cal the final cost value
  opt_weights_s.append(w_opt) # append the optimal weight values for every group

m_te = len(insts_te[:,0])
y_acu = insts_te[:,-1].astype(int)
y_pre = np.zeros(len(insts_te)).astype(int)
y_cost = np.zeros(len(insts_te))
i = 1
for w in opt_weights_s:
  h_te = hypothesis(w,insts_te,m_te,n)
  y_pe = np.round(h_te)
  for j in range(len(y_pe)):
    if y_pe[j]==1 and y_cost[j]==0:
      y_pre[j] = i
      y_cost[j] = opt_cost_s[i-1]
    if y_pe[j] == 1 and y_cost[j] !=0:
      if y_cost[j]>opt_cost_s[i-1]:
        y_pre[j] = i
        y_cost[j] = opt_cost_s[i-1]
  i+=1
confusion_matrix = np.zeros(9).reshape(3,3)

for i in range(m_te):
  if y_pre[i] == y_acu[i]:
    idx = y_acu[i]
    confusion_matrix[idx-1,idx-1] +=1
  else:
    confusion_matrix[y_acu[i]-1,y_pre[i]-1] +=1
performance(confusion_matrix,m_te)

accuracy for group1: 0.15384615384615385
accuracy for group2: 0.9166666666666666
accuracy for group3: 0.9411764705882353
overall accuracy: 0.6904761904761905
