## Importing Libraries

In [451]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import math
from mpl_toolkits.mplot3d import Axes3D
import random

### Setting up seed

In [452]:
random.seed(3116)

### Utilities functions for logistic regression

In [453]:
def _sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [454]:
def y_prediction(X_data, beta):
    beta = np.dot(X_data, beta)
    y_predict = _sigmoid(beta)
    return y_predict

In [455]:
def logloss_function_new(X_data, Y_data, beta): 
    y_hat = y_prediction(X_data, beta)
    
    l = (Y_data * np.log(y_hat)) + ((1-Y_data)*np.log(1-y_hat))
    l = sum(l) 
    l= -1*l
    return np.array(l)[0][0]


In [456]:
def learn_logreg_GA(X_data, Y_data, X_test, Y_test, lr, lamda, nBatches, itrs):
    betas = np.zeros((X_data.shape[1], 1)) 
    listTestAccr = [] 
    listTestloss = [] 
    for k in range(itrs): 
        oldLoss = logloss_function_new(X_data, Y_data, betas)
        
        for s in range(math.ceil(len(X_data)/nBatches)): 
            y_hat = y_prediction(X_data[s*nBatches:(s+1)*nBatches],betas)
            
            betas = (1 - 2*lr*lamda)*betas - lr * (-2 * np.dot(X_data[s*nBatches:(s+1)*nBatches].T,
                                                               Y_data.T[s*nBatches:(s+1)*nBatches]-y_hat))
        
        y_hat_test = y_prediction(X_test,betas)
        predicted_labels = [1 if x >= .5 else 0 for x in y_hat_test]
        no_true = np.count_nonzero(predicted_labels == Y_test)
        no_false = Y_test.shape[1] - no_true
        test_accuracy = no_true/(no_true + no_false)
  
        newLoss = logloss_function_new(X_data, Y_data, betas) 
    
        listTestAccr.append(test_accuracy)        
        listTestloss.append(logloss_function_new(X_test, Y_test, betas))
        

        if np.abs(newLoss - oldLoss) < 0.00000001: 
            break
        
    return listTestloss, listTestAccr

### Function to split dataset into train,test,validation set as mentioned in exercise along with normalization

In [457]:
def split(df,targetcol):
    X = df.drop(targetcol,axis=1)
    Y = df[targetcol]  
    X_copy = X.copy()
    
    Xtrain = X_copy.sample(frac=0.70, random_state=0)
    
    Xtest = X_copy.drop(Xtrain.index)
    Xtest_copy = Xtest.copy()
    Xtest = Xtest_copy.sample(frac=0.50, random_state=0)
    
    Xvalidate = Xtest_copy.drop(Xtest.index)
    
    
    norm_Xtrain = np.linalg.norm(Xtrain, axis = 1, keepdims = True)
    Xtrain = Xtrain / norm_Xtrain
    norm_Xtest = np.linalg.norm(Xtest, axis = 1, keepdims = True)
    Xtest = Xtest / norm_Xtest
    norm_Xval = np.linalg.norm(Xvalidate, axis = 1, keepdims = True)
    Xval = Xvalidate / norm_Xval
    
    
    Y_copy = Y.copy()
    
    Ytrain = Y_copy.sample(frac=0.70, random_state=0)
    
    Ytest = Y_copy.drop(Ytrain.index)
    Ytest_copy = Ytest.copy()
    Ytest = Ytest_copy.sample(frac=0.50, random_state=0)
    
    Yvalidate = Ytest_copy.drop(Ytest.index)
    
    Ytrain = np.matrix(Ytrain)
    Ytest = np.matrix(Ytest)
    Yval = np.matrix(Yvalidate)
    
    return Xtrain, Ytrain, Xval, Yval, Xtest, Ytest

### Loading dataset

In [458]:
data1 = pd.read_csv('bank.csv', delimiter= ';', header=0)

data1 = data1.drop('duration', axis = 1) # dropping coloumn of duration because in exercise 1 backward selection, removing this coloumn gave us minimum aic metric.

data1['y'] = data1['y'].map({'yes': 1, 'no': 0}) #changing label to numeric 

data1 = pd.get_dummies(data1)

data1.insert(loc=0,column='bias',value=np.ones(len(data1))) #inserting bias to dataset

In [459]:
Xtrain, Ytrain, Xval, Yval, Xtest, Ytest = split(data1,"y")

In [460]:
print (Xtrain.shape)
print (Xtest.shape)
print (Xval.shape)

print (Ytrain.shape)
print (Ytest.shape)
print (Yval.shape)

(3165, 51)
(678, 51)
(678, 51)
(1, 3165)
(1, 678)
(1, 678)


### Function to get random hyperparameter configurations

In [461]:
def get_random_hyperparameter_configuration(n, L):
    
    lambdA = np.arange(1e-5 ,1e-2, (1e-2 - 1e-5)/n)
    batch_size = np.random.randint(10, L, size=n, dtype=int)
    alpha = np.arange(1e-5,1e-2, (1e-2 - 1e-5)/n)
    hyperparameter_configuration = [alpha, lambdA, batch_size]
    hyperparameter_configuration = np.vstack(hyperparameter_configuration).T
    return hyperparameter_configuration

### Function to calculate validation loss and Accuracy

In [462]:
def val_loss(Xtrain, Ytrain, Xval, Yval, hyperparameter, r):
    l_test = []
    for h in range(np.shape(hyperparameter)[0]):
        loss_t, accu_t = learn_logreg_GA(Xtrain, Ytrain, Xval, Yval, hyperparameter[h,0], hyperparameter[h,1], int(hyperparameter[h,2]), r)
        l_test.append(loss_t)
    return l_test

### Function that uses Hyperband Algorithm for tuning the hyperparameters of Logistic Regression

In [463]:
def hyperband(Xtrain, Ytrain, Xval, Yval, max_iter, eta):
    
    smax = math.floor(np.abs(np.log(max_iter)/np.log(eta))) # number of unique executions of Successive Halving (minus one)
    
    B = max_iter  * (smax+1) # total number of iterations (without reuse) per execution of Succesive Halving (n,r)
    
    # Begin Finite Horizon Hyperband outlerloop
    for s in range(smax,0,-1):
        n = math.ceil((B*eta*s)/(max_iter*(s+1)))  # initial number of configurations
        r = int(max_iter/eta**s)  # initial number of iterations to run configurations for
        print("s: ", s)
        T = get_random_hyperparameter_configuration(n,100) # Begin Finite Horizon Successive Halving with (n,r)     
        for i in range(s):
            # Run each of the n_i configs for r_i iterations and keep best n_i/eta
            n_i = math.floor(n/eta**i)
            r_i = r*eta**i
            L= val_loss(Xtrain, Ytrain, Xval, Yval, T, r_i)
            t_k = math.ceil(n_i/eta)
            combined_s = np.append(T, L, axis=1)
            sorted_s = combined_s[np.argsort(combined_s[:,-1])]
            T  = sorted_s[0:t_k, 0:3]
        # End Finite Horizon Successive Halving with (n,r)
    return T

### Setting up HyperBand Algorithm hyperparameters

In [464]:
max_iter = 81  # maximum iterations/epochs per configuration
eta = 3 # defines downsampling rate (default=3)

### Running HyperBand Algorithm to get optimize hyperparameters

In [465]:
s = hyperband(Xtrain, Ytrain, Xval, Yval,max_iter,eta)

s:  4
s:  3
s:  2
s:  1


In [466]:
s[0]

array([1.25875e-03, 1.25875e-03, 9.60000e+01])

### Using optimize paramters to run logistic regression and get accuracy and loss on test set

In [470]:
loss_t, accu_t = learn_logreg_GA(Xtrain, Ytrain, Xtest, Ytest, 0.00125875,0.00125875, 96 , 100)

### Test Loss

In [471]:
loss_t[-1]

306.1218031669333

### Test Accuracy

In [472]:
accu_t[-1]

0.8303834808259587