Part 1 - Defining Class for Logistic Regression

In [1]:
import numpy as np
import pandas as pd

In [2]:
class LogitRegression() :

    def __init__( self, learning_rate, iterations) :
        self.learning_rate = learning_rate
        self.iterations = iterations

    def p(self,X):
        return 1/(1+np.exp(-(X @ self.w)))

    def fit(self, X, y) :
        m,n = X.shape
        X = np.hstack([np.ones((m,1)),X])
        y = y.squeeze()
        self.w = np.zeros(n+1)
        for i in range(self.iterations) :
            self.w = self.w - self.learning_rate * ((self.p(X)-y) @ X) 

    def predict(self, X) :
        m = X.shape[0]
        X = np.hstack([np.ones((m,1)),X])
        y_hat = np.where( self.p(X) > 0.5, 1, 0 )
        return y_hat

Part 2 - Loading and Processing Dataset

In [3]:
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [5]:
diabetes_df = pd.read_csv( "./datasets/diabetes.csv" )
X = minmax_scale(diabetes_df.iloc[:,:-1].values)
y = diabetes_df.iloc[:,-1:].values.reshape(-1)
X_train, X_test, y_train, y_test =train_test_split(
    X, y, test_size = 1/3, random_state =6
)

Part 3 - Comparing Models

In [6]:
models = [
    LogitRegression(learning_rate = .1, iterations = 1000),
    LogisticRegression()
]

for model in models:
    model.fit(X_train,y_train)
    
def compute_accuracy(model,X_test,y_test):
    y_hat = model.predict(X_test)
    return (y_hat==y_test).mean() * 100 

print("Accuracy on test set by our implementation of Logistic Reg model :",
    compute_accuracy(models[0],X_test,y_test)
)

print("Accuracy on test set by sklearn model :",
    compute_accuracy(models[1],X_test,y_test) 
)

Accuracy on test set by our implementation of Logistic Reg model : 74.609375
Accuracy on test set by sklearn model : 78.515625
