In [1]:
from numba import jit, float64, int32
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_gaussian_quantiles, make_moons, make_blobs
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LogisticRegression

In [2]:
samp = 100000

#easy to separate
X1, y1 = make_blobs(n_samples = samp, centers = 2, n_features=2, random_state=42)

#slightly harder to separate
X2, y2 = make_moons(n_samples = samp, noise = .75, random_state = 42)

#extreme hard case
X3, y3 = make_gaussian_quantiles(n_samples = samp, n_features=2, n_classes=2, random_state = 42)

@jit
def sigmoid(z):
    return 1.0/(1 + np.exp(-z))

@jit
def loss(w, X, y):
    margin = np.dot(X, w)
    l_if_pos = -np.logaddexp(0, -margin) * y
    l_if_neg = -np.logaddexp(0, margin) * (1 - y)
    
    l = -(l_if_pos + l_if_neg)
    
    return np.sum(l)

@jit
def gradients(X, y, y_hat):
    m = X.shape[0]
    d = X.shape[1]
    
    # Gradient of loss w.r.t weights
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    
    # Gradient of loss w.r.t bias
    db = (1/m)*np.sum((y_hat - y)) 
    
    return dw, db

@jit
def normalize(X): 
    m = X.shape[0]
    n = X.shape[1]
    X = (X - X.mean(axis=0))/X.std(axis=0)
    
    return X

@jit
def train(X, y, bs, epochs, lr):
    m = X.shape[0]
    n = X.shape[1]
    
    # Initializing weights and bias to zeros.
    w = np.zeros((n,1))
    b = 0
    
    # Reshape y.
    y = y.reshape(m,1)
    
    # Normalize inputs
    x = normalize(X)
    
    # Store losses
    losses = []
    
    # Train
    start_i = -1
    end_i = -1
    xb = np.zeros(bs)
    yb = np.zeros(bs)
    l = -1
    y_hat = -1
    dw = np.zeros(n)
    db = -1
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            
            # Defining batches for SGD (this can be changed)
            start_i = i*bs
            end_i = start_i + bs
            xb = x[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Predict
            y_hat = sigmoid(np.dot(xb, w) + b)
            
            # Calculate gradients
            dw, db = gradients(xb, yb, y_hat)
            
            # Update params
            w -= lr*dw
            b -= lr*db
        
        # Calc loss
        l = loss(w, x, y)
        losses.append(l)
        
    return w, b, losses

@jit
def predict(X, w, b):
    
    # X --> Input.
    
    # Normalizing the inputs.
    x = normalize(X)
    
    # Calculating presictions/y_hat.
    preds = sigmoid(np.dot(X, w) + b)
    
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round up to 1
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    
    return np.array(pred_class)

@jit
def accuracy(y, y_hat):
    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

@jit
def compare(X, y):
    # Training 
    start1 = time.time()
    w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
    pred = predict(X, w, b)
    acc = accuracy(y, pred)
    end1 = time.time()
    print(f'Time to run our logistic regression: {end1 - start1} s')
    print(f'Accuracy of our logistic regression: {acc}')
    
    start2 = time.time()
    lr = LogisticRegression()
    lr.fit(X, y)
    sk_acc = lr.score(X, y)
    end2 = time.time()
    print(f'Time to run Sklearn implementation {end2 - start2} s')
    print(f'Accuracy of Sklearn implementation: {sk_acc}')
    
    print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    return w

In [3]:
w1 = compare(X1, y1)

Compilation is falling back to object mode WITH looplifting enabled because Function "compare" failed type inference due to: [1mUntyped global name 'LogisticRegression':[0m [1m[1mCannot determine Numba type of <class 'type'>[0m
[1m
File "../../../../../../../../../../tmp/ipykernel_18074/3098343392.py", line 133:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m[0m
  @jit
[1m
File "../../../../../../../../../../tmp/ipykernel_18074/3098343392.py", line 121:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit
[1m
File "../../../../../../../../../../tmp/ipykernel_18074/3098343392.py", line 121:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m
Compilation is falling back to object mode WITH

Time to run our logistic regression: 22.55413007736206 s
Accuracy of our logistic regression: 0.98624
Time to run Sklearn implementation 0.1589205265045166 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.013759999999999994


In [4]:
w2 = compare(X2, y2)

Time to run our logistic regression: 21.147231101989746 s
Accuracy of our logistic regression: 0.74851
Time to run Sklearn implementation 0.052706003189086914 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.005170000000000008


In [5]:
w3 = compare(X3, y3)

Time to run our logistic regression: 19.772876739501953 s
Accuracy of our logistic regression: 0.49984
Time to run Sklearn implementation 0.04307103157043457 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.003740000000000021
