## Data Processing

In [6]:
# Load packages
import pandas as pd
import numpy as np
import math

In [7]:
# Clean the data

initialCSV = pd.read_csv('example.csv')

# Partition the data into 80% training data and 20% testing data

num_rows = len(initialCSV.index)
first80p = math.floor(0.8 * num_rows)
last20p = num_rows - first80p

x_train = initialCSV.head(first80p).drop(columns = ['col3'])
y_train = initialCSV.head(first80p)[['col3']]

x_test = initialCSV.tail(last20p).drop(columns = ['col3'])
y_test = initialCSV.tail(last20p)[['col3']]

In [None]:
# Calculate avg. (mu) and stdev. (sigma) using the training set

d = x_train.shape[1]
mu = np.mean(x_train, axis = 0).values.reshape(1, d)
sigma = np.std(x_train, axis = 0).values.reshape(1, d)

# Transform the training features 

x_train = (x_train - mu) / (sigma + 1E-6)

# Transform the testing features

x_test = (x_test - mu) / (sigma + 1E-6)

print('Test Mean = ')
print(np.mean(x_test, axis = 0))

print('Test Standard Deviation = ')
print(np.std(x_test, axis = 0))

## Logistic Regression

The objective function is $Q(w; X, y) = \frac{1}{n} \sum_{i = 1}^n \log
\Big(1 + \exp \big(-y_i \; x_i^T \; w \big) \Big) + \frac{\lambda}{2} \|w\|_2^2$

When $\lambda = 0\;$, the model is typical logistic regression. When $\lambda > 0\;$, the model becomes regularized logistic regression.

In [29]:
# Calculate the value of the objective function (i.e. loss)

# Inputs: 
#          weight (w)   -->  d x 1 matrix
#          data (x)     -->  n x d matrix
#          label (y)    -->  n x 1 matrix
#          lmd (scalar) -->  regularization parameter

# Output: loss as a scalar

def objective(w, x , y, lmd):
    
    summ = 0.0
    n = len(x.index)
    
    for i in range(1, n + 1):
        
        xi = x.iloc[[n - 1]]
        yi = y.iloc[[n - 1]]
        
        expo = np.exp(-1 * np.dot(yi, np.dot(xi, w))[0][0])
        summ = summ + math.log(1 + expo, 10)
    
    return (summ / n) + 0.5 * lmd * np.square(w).sum() 

## Gradient Descent

The gradient at $w$ for regularized logistic regression is $g = - \frac{1}{n} \sum_{i = 1}^n \frac{y_i\;x_i}{1\;+\;\exp (\;y_i\;x_i^T\;w\;)} + \lambda\;w$

In [27]:
# Calculate the gradient at w

# Inputs: 
#          weight (w)   -->  d x 1 matrix
#          data (x)     -->  n x d matrix
#          label (y)    -->  n x 1 matrix
#          lmd (scalar) -->  regularization parameter

# Output: gradient (g) as a d x 1 matrix

def gradient(w, x, y, lmd):
    
    summ = 0.0
    n = len(x.index)
    
    for i in range(1, n + 1):
        
        # We want row i as a column
        xi = x.iloc[[n - 1]].T 
        yi = y.iloc[[n - 1]]
        
        expo = np.exp(np.dot(yi, np.dot(xi.T, w))[0][0])
        summ = summ + np.dot(xi, yi) / (1 + expo)
        
    return (lmd * w) - (summ / n)

In [40]:
# Gradient Descent for logistic regression
# Optimal weights will be obtained iteratively 

# Inputs: 
#          data (x)      -->  n x d matrix
#          label (y)     -->  n x 1 matrix
#          lmd (scalar)  -->  regularization parameter
#          learning_rate -->  scalar
#          weights (w)   -->  d x 1 matrix (initial)
#          max_epochs    -->  integer

# Outputs: 
#          weights (w)   -->  d x 1 matrix (final) 
#          objvals       -->  a record of each epoch's objective value 

def gradient_descent(x, y, lmd, learning_rate, w, max_epochs = 100):
    
    objvals = []
    
    for i in range(max_epochs):
        
        gt = gradient(w, x, y, lmd)
        w = w - learning_rate * gt
        objvals = objvals + [objective(w, x, y, lmd).iloc[0]]
    
    return w, objvals

## Training

Use Gradient Descent to obtain optimal weights and a list of objective values for each epoch

In [50]:
# Logistic Regression
# Assumes that the number of data columns is 2

weights = np.random.randn(2, 1)
weights = pd.DataFrame(weights, columns = ['weights'])

logreg = gradient_descent(x_train, y_train, 0, 0.1, weights, 100)

In [53]:
# Regularized Logistic Regression
# Assumes that the number of data columns is 2

weightsR = np.random.randn(2, 1)
weightsR = pd.DataFrame(weightsR, columns = ['weights'])

reglogreg = gradient_descent(x_train, y_train, 0.5, 0.1, weightsR, 100)

## Testing

In [54]:
# Predict the class label

# Inputs: 
#          weights (w)  -->  d x 1 matrix
#          data (X)     -->  m x d matrix

# Output: predictions (f) as an m x 1 matrix

def predict(w, X):
    return np.dot(X, w)

In [None]:
# Evaluate the training error

train_pred_GD_N = predict(logreg[0], x_train)
train_pred_GD_R = predict(reglogreg[0], x_train)

# [0] may need to be appended to these 2 lines when not indexing a scalar
train_MSE_GD_N = np.mean((train_pred_GD_N - y_train) ** 2)
train_MSE_GD_R = np.mean((train_pred_GD_R - y_train) ** 2)

print(train_MSE_GD_N, '\t', train_MSE_GD_R) 

In [None]:
# Evaluate the testing error

test_pred_GD_N = predict(logreg[0], x_test)
test_pred_GD_R = predict(reglogreg[0], x_test)

# [0] may need to be appended to these 2 lines when not indexing a scalar
test_MSE_GD_N = np.mean((test_pred_GD_N - y_test) ** 2)
test_MSE_GD_R = np.mean((test_pred_GD_R - y_test) ** 2)

print(test_MSE_GD_N, '\t', test_MSE_GD_R) 