In [None]:
import numpy as np
import pandas as pd

def h(x,w):
  return np.dot(x,w)

def mse(y,h):
  return (1/len(y))*np.sum(np.square(y-h))

def mae(y,h):
  return (1/len(y))*np.sum(np.abs(y-h))

def cc(y,h):
  return (np.sum(np.multiply(y-np.mean(y), h-np.mean(h)))) / (np.sqrt(np.sum(np.square(y-np.mean(y)))*np.sum(np.square(h-np.mean(h)))))

In [None]:
data = pd.read_excel("/content/drive/MyDrive/NNFL Assignments (Aug 2021)/Assignment 1/data_q2_q3.xlsx")
#data.head()
y = np.array(data['Output'], ndmin=1).T # output values
data.pop('Output')
data.insert(0, "x0", pd.Series(np.ones(len(y)))) # appending ones to feature matrix
x = np.array(data)

# test-train-valid split
m = len(y)
trainp = int(np.floor(0.7*m)) # 70% training
validp = int(np.floor(0.1*m)) # 10% validation

y_train = y[0:trainp]
y_valid = y[trainp:trainp+validp]
y_test = y[trainp+validp:]

x_train = x[0:trainp,:]
x_valid = x[trainp:trainp+validp,:]
x_test = x[trainp+validp:,:]

# normalizing
qq = np.amax(np.abs(x_train), axis=0)
x_train = x_train/qq
x_valid = x_valid/qq
x_test = x_test/qq

pp = np.amax(np.abs(y_train))
y_train = y_train/pp
y_valid = y_valid/pp
y_test = y_test/pp 

m_train = len(y_train)
m_valid = len(y_valid)
m_test = len(y_test)

In [None]:
# grid search for Ridge regression + BGD (time taken ~4m)
alpha_grid = list(np.linspace(0.1,1,50))
Lambda_grid = list(np.linspace(0.1,0.01,50))
T_grid = list(np.linspace(300,600,7))

mse_vals = np.zeros((len(T_grid),len(alpha_grid),len(Lambda_grid))) # validation mse for all models

for T in T_grid:
  for alpha in alpha_grid:
    for Lambda in Lambda_grid:
      w = np.zeros(5) #zero weights
      for t in range(int(T)):
        for j in range(len(w)):
          w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train)*np.dot(h(x_train,w)-y_train,x_train[:,j])
      mse_vals[T_grid.index(T)][alpha_grid.index(alpha)][Lambda_grid.index(Lambda)] = mse(y_valid, h(x_valid,w))

# index of minimum mse
index = np.unravel_index(np.argmin(mse_vals, axis=None), mse_vals.shape)

print("minimum validation mse = {}".format(mse_vals[index]))
print("index = {}".format(index))
print("Optimal T value = {}\nOptimal alpha value = {}\nOptimal Lambda value = {}".format(T_grid[index[0]],alpha_grid[index[1]],Lambda_grid[index[2]]))

minimum validation mse = 0.021961949905642493
index = (0, 48, 49)
Optimal T value = 300.0
Optimal alpha value = 0.9816326530612246
Optimal Lambda value = 0.01


In [None]:
# Ridge regression + BGD
w = np.random.rand(5) #random weights
T = 300
alpha = 0.9816326530612246
Lambda = 0.01

for t in range(T):
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/m_train)*np.dot(h(x_train,w)-y_train,x_train[:,j])
#training complete

#results
mse_train = mse(y_train, h(x_train,w))
mae_train = mae(y_train, h(x_train,w))
cc_train = cc(y_train, h(x_train,w))

mse_test = mse(y_test, h(x_test,w))
mae_test = mae(y_test, h(x_test,w))
cc_test = cc(y_test, h(x_test,w))

print("MSE for training data = {}".format(mse_train))
print("MAE for training data = {}".format(mae_train))
print("CC for training data = {}".format(cc_train))
print()
print("MSE for test data = {}".format(mse_test))
print("MAE for test data = {}".format(mae_test))
print("CC for test data = {}".format(cc_test))

MSE for training data = 0.014692593685467943
MAE for training data = 0.102702204871611
CC for training data = 0.8552343583994862

MSE for test data = 0.04303030794700614
MAE for test data = 0.17971579460290427
CC for test data = -0.8016368701776732


In [None]:
#grid search for Ridge regression + SGD (time taken ~3m)
alpha_grid = list(np.linspace(0.01,0.1,50))
Lambda_grid = list(np.linspace(0.001,0.1,50))
T_grid = list(np.linspace(450,750,7))

mse_vals = np.zeros((len(T_grid),len(alpha_grid),len(Lambda_grid))) # validation mse for all models

for T in T_grid:
  for alpha in alpha_grid:
    for Lambda in Lambda_grid:
      w = np.zeros(5)
      for t in range(int(T)):
        ind = np.random.randint(m_train)
        for j in range(len(w)):
          w[j] = (1-alpha*Lambda)*w[j] - alpha*(h(x_train,w)[ind]-y_train[ind])*x_train[ind,j]
      mse_vals[T_grid.index(T)][alpha_grid.index(alpha)][Lambda_grid.index(Lambda)] = mse(y_valid, h(x_valid,w))

# index of minimum mse
index = np.unravel_index(np.argmin(mse_vals, axis=None), mse_vals.shape)

print("minimum validation mse = {}".format(mse_vals[index]))
print("index = {}".format(index))
print("Optimal T value = {}\nOptimal alpha value = {}\nOptimal Lambda value = {}".format(T_grid[index[0]],alpha_grid[index[1]],Lambda_grid[index[2]]))

minimum validation mse = 0.0016680546066898136
index = (6, 43, 0)
Optimal T value = 750.0
Optimal alpha value = 0.08897959183673469
Optimal Lambda value = 0.001


In [None]:
# Ridge regression + SGD
w = np.random.rand(5) #random weights
T = 750
alpha = 0.08897959183673469
Lambda = 0.001

for t in range(T):
  ind = np.random.randint(m_train)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - alpha*(h(x_train,w)[ind]-y_train[ind])*x_train[ind,j]
#training complete

#results  
mse_train = mse(y_train, h(x_train,w))
mae_train = mae(y_train, h(x_train,w))
cc_train = cc(y_train, h(x_train,w))

mse_test = mse(y_test, h(x_test,w))
mae_test = mae(y_test, h(x_test,w))
cc_test = cc(y_test, h(x_test,w))

print("MSE for training data = {}".format(mse_train))
print("MAE for training data = {}".format(mae_train))
print("CC for training data = {}".format(cc_train))
print()
print("MSE for test data = {}".format(mse_test))
print("MAE for test data = {}".format(mae_test))
print("CC for test data = {}".format(cc_test))

MSE for training data = 0.014935088090831918
MAE for training data = 0.10417160982805188
CC for training data = 0.8501132986721834

MSE for test data = 0.048317986199372616
MAE for test data = 0.18995248833386663
CC for test data = -0.803665983848084


In [None]:
# grid search for Ridge regression + MBGD (time taken ~1h 21m)
nb = 32 #batch size
rng = np.random.default_rng()
alpha_grid = list(np.linspace(0.1,1,50))
Lambda_grid = list(np.linspace(0.001,0.01,50))
T_grid = list(np.linspace(300,600,7))

mse_vals = np.zeros((len(T_grid),len(alpha_grid),len(Lambda_grid))) # validation mse for all models

for T in T_grid:
  for alpha in alpha_grid:
    for Lambda in Lambda_grid:
      w = np.zeros(5)
      for t in range(int(T)):
        ind = rng.choice(m_train, 32, replace=False)
        for j in range(len(w)):
          w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.sum([(h(x_train,w)[i]-y_train[i])*x_train[i,j] for i in ind])
      mse_vals[T_grid.index(T)][alpha_grid.index(alpha)][Lambda_grid.index(Lambda)] = mse(y_valid, h(x_valid,w))

# index of minimum mse
index = np.unravel_index(np.argmin(mse_vals, axis=None), mse_vals.shape)

print("minimum validation mse = {}".format(mse_vals[index]))
print("index = {}".format(index))
print("Optimal T value = {}\nOptimal alpha value = {}\nOptimal Lambda value = {}".format(T_grid[index[0]],alpha_grid[index[1]],Lambda_grid[index[2]]))

minimum validation mse = 0.005029911886075502
index = (3, 44, 1)
Optimal T value = 450.0
Optimal alpha value = 0.9081632653061225
Optimal Lambda value = 0.0011836734693877551


In [None]:
# Ridge regression + MBGD
nb = 32 #batch size
w = np.random.rand(5) #random weights
alpha = 0.9081632653061225
T = 450
Lambda = 0.0011836734693877551

rng = np.random.default_rng()
for t in range(T):
  ind = rng.choice(m_train, nb, replace=False)
  for j in range(len(w)):
    w[j] = (1-alpha*Lambda)*w[j] - (alpha/nb)*np.sum([(h(x_train,w)[i]-y_train[i])*x_train[i,j] for i in ind])
#training complete

#results
mse_train = mse(y_train, h(x_train,w))
mae_train = mae(y_train, h(x_train,w))
cc_train = cc(y_train, h(x_train,w))

mse_test = mse(y_test, h(x_test,w))
mae_test = mae(y_test, h(x_test,w))
cc_test = cc(y_test, h(x_test,w))

print("MSE for training data = {}".format(mse_train))
print("MAE for training data = {}".format(mae_train))
print("CC for training data = {}".format(cc_train))
print()
print("MSE for test data = {}".format(mse_test))
print("MAE for test data = {}".format(mae_test))
print("CC for test data = {}".format(cc_test))

MSE for training data = 0.013320631843608821
MAE for training data = 0.09043692958704735
CC for training data = 0.8639528133301712

MSE for test data = 0.046548600095886995
MAE for test data = 0.18841642961198085
CC for test data = -0.802003445477648
