In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_gaussian_quantiles, make_moons, make_blobs
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LogisticRegression
%load_ext line_profiler

In [2]:
#easy to separate
samp = 100000
X1, y1 = make_blobs(n_samples = samp, centers = 2, n_features=2, random_state=42)                                                                  

In [3]:
#slightly harder to separate
X2, y2 = make_moons(n_samples = samp, noise = .75, random_state = 42)

In [4]:
#extreme hard case
X3, y3 = make_gaussian_quantiles(n_samples = samp, n_features=2, n_classes=2, random_state = 42)

## Algorithm

In [5]:
def train(X, y, bs, epochs, lr):
    m2, n2 = X.shape
    
    # Initializing weights and bias to zeros.
    w = np.zeros((n2,1))
    b = 0
    
    # Reshape y.
    y = y.reshape(m2,1)
    
    # Normalize inputs
    #x = normalize(X)
    #____________________change 1
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
    #______________________________
    
    
    # Store losses
    losses = []
    
    # Train
    for epoch in range(epochs):
        for i in range((m2-1)//bs + 1):
            
            # Defining batches for SGD (this can be changed)
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            
            # Predict
            #y_hat = sigmoid(np.dot(xb, w) + b)
            #____________________change 4
            y_hat =1.0/(1 + np.exp(-(np.dot(xb, w) + b)))
            #_____________________
            
            
            # Calculate gradients
            #dw, db = gradients(xb, yb, y_hat)
            #____________________________change 2
            m1 = xb.shape[0]
            # Gradient of loss w.r.t weights
            #dw = (1/m1)*np.dot(xb.T,(y_hat - yb))
            # Gradient of loss w.r.t bias
            #db = (1/m1)*np.sum((y_hat - yb)) 
            #______________________________
            # Update params
            w -= lr*(1/m1)*np.dot(xb.T,(y_hat - yb))
            b -= lr*(1/m1)*np.sum((y_hat - yb)) 
        
        # Calc loss
        #l = loss(x, y, w)
        #_________________________change 3
        margin = np.dot(X, w)
        losses.append(y * -np.logaddexp(0, np.exp(margin) + (1 - y) * (1 + np.logaddexp(0, np.exp(margin)))))
        #_________________________
    return w, b, losses

In [6]:
def predict(X,w,b):
    
    # X --> Input.
    
    # Normalizing the inputs.
    #x = normalize(X)
    #_____________________change 6
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
    #_______________________
    
    # Calculating presictions/y_hat.
    #preds = sigmoid(np.dot(X, w) + b)
    #______________________change 5
    preds = 1.0/(1 + np.exp(-(np.dot(X, w) + b)))
    #_______________________
    
    # if y_hat >= 0.5 --> round up to 1
    # if y_hat < 0.5 --> round up to 1
    pred_class = [1 if i > 0.5 else 0 for i in preds]
    
    return np.array(pred_class)

In [7]:
def accuracy(y, y_hat):
    accuracy = np.sum(y == y_hat) / len(y)
    return accuracy

In [8]:
def compare(X, y):
    # Training 
    start1 = time.time()
    w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
    pred = predict(X,w,b)
    acc = accuracy(y, pred)
    end1 = time.time()
    print(f'Time to run our logistic regression: {end1 - start1} s')
    print(f'Accuracy of our logistic regression: {acc}')
    
    
    start2 = time.time()
    lr = LogisticRegression()
    lr.fit(X, y)
    sk_acc = lr.score(X, y)
    end2 = time.time()
    print(f'Time to run Sklearn implementation {end2 - start2} s')
    print(f'Accuracy of Sklearn implementation: {sk_acc}')
    
    print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    
    return w

In [9]:
w1 = compare(X1, y1)

Time to run our logistic regression: 26.51914882659912 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.13305425643920898 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0


In [11]:
%lprun -f compare w1 = compare(X1, y1)

Time to run our logistic regression: 33.31061625480652 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.11560893058776855 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0


Timer unit: 1e-07 s

Total time: 33.4267 s
File: <ipython-input-8-1fde78965852>
Function: compare at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def compare(X, y):
     2                                               # Training 
     3         1         18.0     18.0      0.0      start1 = time.time()
     4         1  331732798.0 331732798.0     99.2      w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
     5         1    1365596.0 1365596.0      0.4      pred = predict(X,w,b)
     6         1       2057.0   2057.0      0.0      acc = accuracy(y, pred)
     7         1         25.0     25.0      0.0      end1 = time.time()
     8         1       1138.0   1138.0      0.0      print(f'Time to run our logistic regression: {end1 - start1} s')
     9         1        219.0    219.0      0.0      print(f'Accuracy of our logistic regression: {acc}')
    10                                               
    11                                               
    12         1          8.0      8.0      0.0      start2 = time.time()
    13         1        238.0    238.0      0.0      lr = LogisticRegression()
    14         1    1112627.0 1112627.0      0.3      lr.fit(X, y)
    15         1      50576.0  50576.0      0.0      sk_acc = lr.score(X, y)
    16         1         20.0     20.0      0.0      end2 = time.time()
    17         1        751.0    751.0      0.0      print(f'Time to run Sklearn implementation {end2 - start2} s')
    18         1        220.0    220.0      0.0      print(f'Accuracy of Sklearn implementation: {sk_acc}')
    19                                               
    20         1        241.0    241.0      0.0      print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    21                                               
    22         1          6.0      6.0      0.0      return w

In [12]:
w2 = compare(X2, y2)

Time to run our logistic regression: 21.972463369369507 s
Accuracy of our logistic regression: 0.75368
Time to run Sklearn implementation 0.03289008140563965 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.0


In [13]:
%lprun -f compare w2 = compare(X2, y2)

Time to run our logistic regression: 32.77240824699402 s
Accuracy of our logistic regression: 0.75368
Time to run Sklearn implementation 0.04485058784484863 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.0


Timer unit: 1e-07 s

Total time: 32.8175 s
File: <ipython-input-8-1fde78965852>
Function: compare at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def compare(X, y):
     2                                               # Training 
     3         1         28.0     28.0      0.0      start1 = time.time()
     4         1  326258853.0 326258853.0     99.4      w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
     5         1    1466862.0 1466862.0      0.4      pred = predict(X,w,b)
     6         1       2818.0   2818.0      0.0      acc = accuracy(y, pred)
     7         1         25.0     25.0      0.0      end1 = time.time()
     8         1       1443.0   1443.0      0.0      print(f'Time to run our logistic regression: {end1 - start1} s')
     9         1        254.0    254.0      0.0      print(f'Accuracy of our logistic regression: {acc}')
    10                                               
    11                                               
    12         1          8.0      8.0      0.0      start2 = time.time()
    13         1        219.0    219.0      0.0      lr = LogisticRegression()
    14         1     367734.0 367734.0      0.1      lr.fit(X, y)
    15         1      75070.0  75070.0      0.0      sk_acc = lr.score(X, y)
    16         1         31.0     31.0      0.0      end2 = time.time()
    17         1        861.0    861.0      0.0      print(f'Time to run Sklearn implementation {end2 - start2} s')
    18         1        233.0    233.0      0.0      print(f'Accuracy of Sklearn implementation: {sk_acc}')
    19                                               
    20         1        264.0    264.0      0.0      print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    21                                               
    22         1          5.0      5.0      0.0      return w

In [14]:
w3 = compare(X3, y3)

Time to run our logistic regression: 21.294788122177124 s
Accuracy of our logistic regression: 0.50117
Time to run Sklearn implementation 0.023920059204101562 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.0024100000000000232


In [15]:
%lprun -f compare w3 = compare(X3, y3)

Time to run our logistic regression: 34.31549835205078 s
Accuracy of our logistic regression: 0.50117
Time to run Sklearn implementation 0.030896663665771484 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.0024100000000000232


Timer unit: 1e-07 s

Total time: 34.3466 s
File: <ipython-input-8-1fde78965852>
Function: compare at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def compare(X, y):
     2                                               # Training 
     3         1         36.0     36.0      0.0      start1 = time.time()
     4         1  341765201.0 341765201.0     99.5      w, b, l = train(X, y, bs=100, epochs=1000, lr=0.001)
     5         1    1388000.0 1388000.0      0.4      pred = predict(X,w,b)
     6         1       2111.0   2111.0      0.0      acc = accuracy(y, pred)
     7         1         28.0     28.0      0.0      end1 = time.time()
     8         1       1205.0   1205.0      0.0      print(f'Time to run our logistic regression: {end1 - start1} s')
     9         1        245.0    245.0      0.0      print(f'Accuracy of our logistic regression: {acc}')
    10                                               
    11                                               
    12         1          9.0      9.0      0.0      start2 = time.time()
    13         1        364.0    364.0      0.0      lr = LogisticRegression()
    14         1     252927.0 252927.0      0.1      lr.fit(X, y)
    15         1      54592.0  54592.0      0.0      sk_acc = lr.score(X, y)
    16         1         16.0     16.0      0.0      end2 = time.time()
    17         1        736.0    736.0      0.0      print(f'Time to run Sklearn implementation {end2 - start2} s')
    18         1        224.0    224.0      0.0      print(f'Accuracy of Sklearn implementation: {sk_acc}')
    19                                               
    20         1        245.0    245.0      0.0      print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    21                                               
    22         1          4.0      4.0      0.0      return w