In [1]:
import pandas as pd
import numpy as np

In [2]:
# Use the logistic regression (multi class; implemented from scratch) training algorithm and plot the training 
# and test errors.

tra_df = pd.read_csv('optdigits.tra', header=None, sep=',')
tes_df = pd.read_csv('optdigits.tes', header=None, sep=',')
tra_lbl = tra_df.pop(64)
tes_lbl = tes_df.pop(64)

In [10]:
def standardScaler(feature_array):
    total_cols = feature_array.shape[1] # total number of columns 
    for i in range(total_cols): # iterating through each column
        feature_col = feature_array[i]
        mean = feature_col.mean() # mean stores mean value for the column
        std = feature_col.std() # std stores standard deviation value for the column
        if(std==float(0)):
            std=1
        feature_array[i] = (feature_array[i] - mean) / std
        
def sigmoid(z):
    return 1.0/(1 + np.exp(-z))


def loss(y, y_hat):
    loss = -np.mean(y*(np.log(y_hat)) - (1-y)*np.log(1-y_hat))
    return loss


def gradients(X, y, y_hat,w=None,Lambda=None):
    
    # X --> Input.
    # y --> true/target value.
    # y_hat --> hypothesis/predictions.
    # w --> weights (parameter).
    # b --> bias (parameter).
    
    # m-> number of training examples.
    m = X.shape[0]
    
    # Gradient of loss w.r.t weights.
    if(Lambda==None):
        dw = (1/m)*np.dot(X.T, (y_hat - y))
    else:
        dw = (1/m)*(np.dot(X.T, (y_hat - y)) + Lambda*w)
    
    # Gradient of loss w.r.t bias.
    db = (1/m)*np.sum((y_hat - y)) 
    
    return dw, db

def train(X, y, bs, epochs, lr,Lambda=None):
    
    # X --> Input.
    # y --> true/target value.
    # bs --> Batch Size.
    # epochs --> Number of iterations.
    # lr --> Learning rate.
        
    # m-> number of training examples
    # n-> number of features 
    m, n = X.shape
    
    # Initializing weights and bias to zeros.
    w = np.zeros((n,1))
    b = 0
    
    # Reshaping y.
    y = y.reshape(m,1)
    
    # Normalizing the inputs.
    
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            
            # Defining batches. SGD.
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Calculating hypothesis/prediction.
            y_hat = sigmoid(np.dot(xb, w) + b)
            
            # Getting the gradients of loss w.r.t parameters.
            dw, db = gradients(xb, yb, y_hat,w,Lambda)
            
            # Updating the parameters.
            w -= lr*dw
            b -= lr*db
        
        # Calculating loss and appending it in the list.
        l = loss(y, sigmoid(np.dot(X, w) + b))
        losses.append(l)
        
    # returning weights, bias and losses(List).
    return w, b, losses

def predict(X,w,b):
    
    # Calculating presictions/y_hat.
    preds = sigmoid(np.dot(X, w) + b)
    
    # Empty List to store predictions.
    pred_class = []
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round up to 1
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    return np.array(pred_class),np.array(preds.reshape(1,-1)[0])

def convert_labels(train_label,test_label):
    n_class = np.unique(train_label)
    train_labels,test_labels = [],[]
    for i in n_class:
        train_labels.append(np.where(train_label == i, 1, 0))
        test_labels.append(np.where(test_label == i, 1, 0))
    return train_labels,test_labels
        

In [4]:
# Scaling the Training and test Dataset
standardScaler(tra_df)
standardScaler(tes_df)

# Building label data sets for each class to build 0-1 Logistic Regression models.
train_labels,test_labels = convert_labels(tra_lbl,tes_lbl)

In [5]:
# Training 
model_w,model_b,model_l = [],[],[]
for i in np.unique(tra_lbl):
    w, b, l = train(tra_df, train_labels[i], bs=100, epochs=1000, lr=0.01)
    model_w.append(w)
    model_b.append(b)
    model_l.append(l)
    


In [6]:
#Prediction
train_preds,test_preds = pd.DataFrame(),pd.DataFrame()
for i in np.unique(tra_lbl):
    _,train_preds[i] = predict(tra_df,model_w[i],model_b[i])
    _,test_preds[i] = predict(tes_df,model_w[i],model_b[i])

In [7]:
#Train error and Test error
from sklearn.metrics import accuracy_score

train_accuracy=accuracy_score(tra_lbl, train_preds.idxmax(axis=1))*100
test_accuracy=accuracy_score(tes_lbl, test_preds.idxmax(axis=1))*100
print("Train Accuracy: ",train_accuracy)
print("Test Accuracy: ", test_accuracy)

Train Accuracy:  97.12267852471881
Test Accuracy:  94.54646633277684


In [20]:
# Use the logistic regression classifier with regularization so that you also penalize large weights (λ∥w∥2). 
# Plot the average training and test errors for at least 5 different values of regularization parameter λ.

reg_values = [0.0001,0.001,0.1]

for val in reg_values:
    model_w,model_b,model_l = [],[],[]
    for i in np.unique(tra_lbl):
        w, b, l = train(tra_df, train_labels[i], bs=500, epochs=5000, lr=0.5,Lambda=val)
        model_w.append(w)
        model_b.append(b)
        model_l.append(l)

    train_preds,test_preds = pd.DataFrame(),pd.DataFrame()
    for i in np.unique(tra_lbl):
        _,train_preds[i] = predict(tra_df,model_w[i],model_b[i])
        _,test_preds[i] = predict(tes_df,model_w[i],model_b[i])

    train_accuracy=accuracy_score(tra_lbl, train_preds.idxmax(axis=1))*100
    test_accuracy=accuracy_score(tes_lbl, test_preds.idxmax(axis=1))*100
    print("Train Accuracy: ",train_accuracy)
    print("Test Accuracy: ", test_accuracy)
    print("------------------------------",val)


  app.launch_new_instance()
  app.launch_new_instance()


Train Accuracy:  98.5874967303165
Test Accuracy:  94.65776293823038
------------------------------ 0.0001
Train Accuracy:  98.5874967303165
Test Accuracy:  94.65776293823038
------------------------------ 0.001
Train Accuracy:  98.29976458278838
Test Accuracy:  94.88035614913744
------------------------------ 0.1
