In [1]:
%load_ext cython

In [2]:
%%cython

import numpy as np
cimport numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_gaussian_quantiles, make_moons, make_blobs
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LogisticRegression

cdef int samp = 100000

#easy to separate
X1, y1 = make_blobs(n_samples = samp, centers = 2, n_features=2, random_state=42)

#slightly harder to separate
X2, y2 = make_moons(n_samples = samp, noise = .75, random_state = 42)

#extreme hard case
X3, y3 = make_gaussian_quantiles(n_samples = samp, n_features=2, n_classes=2, random_state = 42)

def sigmoid(int z):
    return 1.0/(1 + np.exp(-z))

def loss(np.ndarray w, np.ndarray X, np.ndarray y):
    cdef np.ndarray margin = np.dot(X, w)
    cdef np.ndarray l_if_pos = -np.logaddexp(0, -margin) * y
    cdef np.ndarray l_if_neg = -np.logaddexp(0, margin) * (1 - y)
    
    cdef np.ndarray l = -(l_if_pos + l_if_neg)
    
    return np.sum(l)

def gradients(np.ndarray X, np.ndarray y, np.ndarray y_hat):
    cdef int m = X.shape[0]
    cdef int d = X.shape[1]
    
    # Gradient of loss w.r.t weights
    cdef np.ndarray dw = (1/m)*np.dot(X.T, (y_hat - y))
    
    # Gradient of loss w.r.t bias
    cdef float db = (1/m)*np.sum((y_hat - y)) 
    
    return dw, db

def normalize(np.ndarray X): 
    cdef int m = X.shape[0]
    cdef int n = X.shape[1]
    X = (X - X.mean(axis=0))/X.std(axis=0)
    
    return X

def train(np.ndarray X, np.ndarray y, int bs, int epochs, float lr):
    cdef int m = X.shape[0]
    cdef int n = X.shape[1]
    
    # Initializing weights and bias to zeros.
    cdef np.ndarray w = np.zeros((n,1))
    cdef float b = 0
    
    # Reshape y.
    y = y.reshape(m,1)
    
    # Normalize inputs
    x = normalize(X)
    
    # Store losses
    losses = []
    
    # Train
    cdef int start_i = -1
    cdef int end_i = -1
    cdef np.ndarray xb = np.zeros(bs)
    cdef np.ndarray yb = np.zeros(bs)
    cdef float l = -1
    cdef float y_hat = -1
    cdef np.ndarray dw = np.zeros(n)
    cdef float db = -1
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            
            # Defining batches for SGD (this can be changed)
            start_i = i*bs
            end_i = start_i + bs
            xb = x[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Predict
            y_hat = sigmoid(np.dot(xb, w) + b)
            
            # Calculate gradients
            dw, db = gradients(xb, yb, y_hat)
            
            # Update params
            w -= lr*dw
            b -= lr*db
        
        # Calc loss
        l = loss(w, x, y)
        losses.append(l)
        
    return w, b, losses

def predict(np.ndarray X, np.ndarray w, float b):
    
    # X --> Input.
    
    # Normalizing the inputs.
    cdef np.ndarray x = normalize(X)
    
    # Calculating presictions/y_hat.
    cdef np.ndarray preds = sigmoid(np.dot(X, w) + b)
    
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round up to 1
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    
    return np.array(pred_class)

def accuracy(np.ndarray y, np.ndarray y_hat):
    cdef float accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

def compare(np.ndarray X, np.ndarray y):
    # Training 
    start1 = time.time()
    w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
    pred = predict(X, w, b)
    cdef float acc = accuracy(y, pred)
    end1 = time.time()
    print(f'Time to run our logistic regression: {end1 - start1} s')
    print(f'Accuracy of our logistic regression: {acc}')
    
    
    start2 = time.time()
    lr = LogisticRegression()
    lr.fit(X, y)
    cdef float sk_acc = lr.score(X, y)
    end2 = time.time()
    print(f'Time to run Sklearn implementation {end2 - start2} s')
    print(f'Accuracy of Sklearn implementation: {sk_acc}')
    
    print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    
    return w

In [3]:
w1 = compare(X1, y1)
w2 = compare(X2, y2)
w3 = compare(X3, y3)

NameError: name 'compare' is not defined