## Grid Search

In [None]:
import numpy as np
import pandas as pd

import itertools

In [None]:
class LogisticRegression:

    def fit(self, X, y, descent = "stochastic", n_iter = 100000, learning_rate = 0.1, lossConvergence = 0.01, batch_size = 32):
        X = X.values
        intercept_col = np.ones(X_train.shape[0]).reshape(X_train.shape[0],1)
        X = np.hstack((intercept_col, X))
        self._X = X
        self._y = y
         
        if descent == "stochastic":
            self._b = self._stochastic_gradient_descent(X,
                                                        y,
                                                        n_iter = n_iter,
                                                        learning_rate = learning_rate,
                                                        lossConvergence = lossConvergence,
                                                        batch_size = batch_size)
        else:
             self._b = self._gradient_descent(X,
                                             y,
                                             n_iter = n_iter,
                                             learning_rate = learning_rate)           
        
    # Stochastic Gradient Descent Algorithm for Logistic Regression 
    def _stochastic_gradient_descent(self, X, y, n_iter = 10000, learning_rate = 0.01, lossConvergence = .001, batch_size = 128):
        y = y.values.reshape(len(y),1)
        betas = np.zeros(X.shape[1]).reshape(X.shape[1],1) + 0.1
        for i in range(n_iter):
            X_index = np.arange(X.shape[0])
            np.random.shuffle(X_index)
            batch_index = X_index[:batch_size]

            X_batch = X[batch_index,:]
            y_batch = y[batch_index,:]

            yhat = self._sigmoid(np.dot(X_batch, betas))
            gradient = np.dot(X_batch.T,(y_batch - yhat))
            if self._loss(yhat, y_batch) < lossConvergence:
                break
            betas +=  learning_rate * (gradient/X.shape[1])
        return betas
    
   # Gradient Descent Algorithm for Logistic Regression 
    def _gradient_descent(self, X,y, n_iter = 10000, learning_rate = 0.01, lossConvergence = .001):
        y = y.values.reshape(len(y),1)
        betas = np.zeros(X.shape[1]).reshape(X.shape[1],1) + 0.1
        for i in range(n_iter):
            yhat = self._sigmoid(np.dot(X, betas))
            gradient = np.dot(X.T,(y - yhat))
            if self._loss(yhat, y) < lossConvergence:
                break
            betas +=  learning_rate * (gradient/X.shape[1])
        return X, betas
    
    def _loss(self, yhat, y):
        loss_vals = yhat.copy()
        loss_vals[y == 1] = -np.log(yhat[y==1])
        loss_vals[y == 0] = -np.log(1 - yhat[y==0])
        return(loss_vals.mean())

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
        
    def predict(self, X):
        X = X.values
        intercept_col = np.ones(X_train.shape[0]).reshape(X_train.shape[0],1)
        X = np.hstack((intercept_col, X))
        return self._sigmoid(np.dot(X, self._b))
    

In [None]:
class ConfusionMatrix:
    
    def __init__(self, yhat, y):
        yhat_copy = yhat.copy()
        yhat_copy[yhat >= 0.5] = 1
        yhat_copy[yhat < 0.5] = 0

        true_positive_num = 0
        false_positive_num = 0
        false_negative_num = 0
        true_negative_num = 0

        for yhat_i, y_i in zip(yhat_copy, y):
            if yhat_i == 1 and y_i == 1:
                true_positive_num += 1
            elif yhat_i == 1 and y_i == 0:
                false_positive_num += 1
            elif yhat_i == 0 and y_i == 0:
                true_negative_num += 1
            elif yhat_i == 0 and y_i == 1:
                false_negative_num += 1

        t = len(y)
        
        self.true_positive = np.round(true_positive_num/t,2)
        self.false_positive = np.round(false_positive_num/t,2)
        self.false_negative = np.round(false_negative_num/t,2)
        self.true_negative = np.round(true_negative_num/t,2)

        self.df = pd.DataFrame([[self.true_positive,self.false_positive], [self.false_negative, self.true_negative]])
        self.df = self.df.rename(columns = {0: "Positive", 1: "Negative"}, index = {0: "Positive", 1: "Negative"})

In [None]:
# Uses stochastic gradient decent at the moment
# Assumes the following variables are defined:

# X_train_subset_tfidf - tfidf train set
# X_test_subset_tfidf - tfidf test set

# y - matrix of 12,500 0s and 12,500 1s

def logistic_regression_grid_search(n_iter_list, learning_rate_list, lossConvergence_list, batch_size_list):
    
    unique_regressions = []
    
    logistic_regression = LogisticRegression()
    
    combinations = itertools.product(n_iter_list, learning_rate_list, lossConvergence_list, batch_size_list)
    
    for combination in combinations:
        print("Running combination:")
        n_iter, learning_rate, lossConvergence, batch_size = combination
        
        print("n_iter: ", n_iter)
        print("learning rate: ", learning_rate)
        print("lossConvergence: ", lossConvergence)
        print("batch size: ", batch_size)
        
        logistic_regression.fit(X_train_subset_tfidf, y_train["Sentiment"], descent= "stochastic", n_iter=n_iter, learning_rate=learning_rate, lossConvergence=lossConvergence, batch_size = batch_size)
        
        yhat_lr_test = logistic_regression.predict(X_test_subset_tfidf)
        
        cm = ConfusionMatrix(yhat_lr_test, y)
        
        print("Data Frame:")
        print(cm.df)
        
        print("Accuracy:", cm.true_positive + cm.true_negative)
        
        print("\n")
        
        unique_regressions.append((combination, cm)) # Tuple of combination and confusion matrix
                        
    return unique_regressions

In [None]:
n_iter_list = [50000, 100000, 200000]
learning_rate_list = [1]
lossConvergence_list = [0.01, 0.05]
batch_size_list = [512, 1024, 2048]

In [None]:
unique_regressions = logistic_regression_grid_search(n_iter_list, learning_rate_list, lossConvergence_list, batch_size_list)