In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_gaussian_quantiles, make_moons, make_blobs
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LogisticRegression
%load_ext line_profiler

In [2]:
#easy to separate
samp = 100000
X1, y1 = make_blobs(n_samples = samp, centers = 2, n_features=2, random_state=42)                                                                  

In [3]:
#slightly harder to separate
X2, y2 = make_moons(n_samples = samp, noise = .75, random_state = 42)

In [4]:
#extreme hard case
X3, y3 = make_gaussian_quantiles(n_samples = samp, n_features=2, n_classes=2, random_state = 42)

## Algorithm

In [5]:
def train(X, y, bs, epochs, lr):
    m2, n2 = X.shape
    
    # Initializing weights and bias to zeros.
    w = np.zeros((n2,1))
    b = 0
    
    # Reshape y.
    y = y.reshape(m2,1)
    
    # Normalize inputs
    #x = normalize(X)
    #____________________change 1
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
    #______________________________
    
    
    # Store losses
    losses = []
    
    # Train
    for epoch in range(epochs):
        for i in range((m2-1)//bs + 1):
            
            # Defining batches for SGD (this can be changed)
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Predict
            #y_hat = sigmoid(np.dot(xb, w) + b)
            #____________________change 4
            y_hat =1.0/(1 + np.exp(-(np.dot(xb, w) + b)))
            #_____________________
            
            
            # Calculate gradients
            #dw, db = gradients(xb, yb, y_hat)
            #____________________________change 2
            m1 = xb.shape[0]
            # Gradient of loss w.r.t weights
            #dw = (1/m1)*np.dot(xb.T,(y_hat - yb))
            # Gradient of loss w.r.t bias
            #db = (1/m1)*np.sum((y_hat - yb)) 
            #______________________________
            # Update params
            w -= lr*(1/m1)*np.dot(xb.T,(y_hat - yb))
            b -= lr*(1/m1)*np.sum((y_hat - yb)) 
        
        # Calc loss
        #l = loss(x, y, w)
        #_________________________change 3
        margin = np.dot(X, w)
        losses.append(y * -np.logaddexp(0, np.exp(margin) + (1 - y) * (1 + np.logaddexp(0, np.exp(margin)))))
        #_________________________
    return w, b, losses

In [6]:
def predict(X,w,b):
    
    # X --> Input.
    
    # Normalizing the inputs.
    #x = normalize(X)
    #_____________________change 6
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
    #_______________________
    
    # Calculating presictions/y_hat.
    #preds = sigmoid(np.dot(X, w) + b)
    #______________________change 5
    preds = 1.0/(1 + np.exp(-(np.dot(X, w) + b)))
    #_______________________
    
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round up to 1
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    
    return np.array(pred_class)

In [7]:
def accuracy(y, y_hat):
    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

In [8]:
def compare(X, y):
    # Training 
    start1 = time.time()
    w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
    pred = predict(X,w,b)
    acc = accuracy(y, pred)
    end1 = time.time()
    print(f'Time to run our logistic regression: {end1 - start1} s')
    print(f'Accuracy of our logistic regression: {acc}')
    
    
    start2 = time.time()
    lr = LogisticRegression()
    lr.fit(X, y)
    sk_acc = lr.score(X, y)
    end2 = time.time()
    print(f'Time to run Sklearn implementation {end2 - start2} s')
    print(f'Accuracy of Sklearn implementation: {sk_acc}')
    
    print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    
    return w

In [9]:
w1 = compare(X1, y1)

Time to run our logistic regression: 25.197702646255493 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.09368681907653809 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0


In [10]:
w2 = compare(X2, y2)

Time to run our logistic regression: 25.206674337387085 s
Accuracy of our logistic regression: 0.75368
Time to run Sklearn implementation 0.03488302230834961 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.0


In [11]:
w3 = compare(X3, y3)

Time to run our logistic regression: 25.31231999397278 s
Accuracy of our logistic regression: 0.50117
Time to run Sklearn implementation 0.024916887283325195 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.0024100000000000232
