In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification, make_gaussian_quantiles, make_moons, make_blobs
import matplotlib.pyplot as plt
import time
from sklearn.linear_model import LogisticRegression
from logistic_regression_original import LogisticRegressionOriginal
from logistic_regression_optimized import LogisticRegressionOptimized
import jax.numpy as jnp

## sklearn data

In [2]:
#easy to separate
samp = 100000
X1, y1 = make_blobs(n_samples = samp, centers = 2, n_features=2, random_state=42)
#slightly harder to separate
X2, y2 = make_moons(n_samples = samp, noise = .75, random_state = 42)
#extreme hard case
X3, y3 = make_gaussian_quantiles(n_samples = samp, n_features=2, n_classes=2, random_state = 42)

In [3]:
def compare(X, y, model_type):
    # Training 
    
    if model_type == 'original': 
        lr_model = LogisticRegressionOriginal(batch_size=100, max_epochs=1000, learning_rate=0.001)
    else:
        X, y = jnp.array(X), jnp.array(y)
        lr_model = LogisticRegressionOptimized()
    start1 = time.time()
    lr_model.train(X, y)
    pred = lr_model.predict(X)
    acc = lr_model.accuracy(y, pred)
    end1 = time.time()
    print(f'Time to run our logistic regression: {end1 - start1} s')
    print(f'Accuracy of our logistic regression: {acc}')
    
    
    lr = LogisticRegression(penalty='none')
    start2 = time.time()
    lr.fit(X, y)
    sk_pred = lr.predict(X)
    sk_acc = lr_model.accuracy(y, sk_pred)
    end2 = time.time()
    print(f'Time to run Sklearn implementation {end2 - start2} s')
    print(f'Accuracy of Sklearn implementation: {sk_acc}')
    
    print(f'\nDifference in accuracies: {np.abs(acc-sk_acc)}')
    
    return lr_model.get_weights()

In [4]:
w1 = compare(X1, y1, 'original')

Time to run our logistic regression: 439.8911409378052 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.3517172336578369 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0


In [5]:
w2 = compare(X2, y2, 'original')

Time to run our logistic regression: 438.25348114967346 s
Accuracy of our logistic regression: 0.75368
Time to run Sklearn implementation 0.24567890167236328 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.0


In [6]:
w3 = compare(X3, y3, 'original')

Time to run our logistic regression: 456.31605792045593 s
Accuracy of our logistic regression: 0.50117
Time to run Sklearn implementation 0.2382059097290039 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.0024100000000000232


In [7]:
w4 = compare(X1, y1, 'optimized')



Optimization terminated successfully.
         Current function value: 0.000084
         Iterations: 24
         Function evaluations: 24
         Gradient evaluations: 24
         Hessian evaluations: 24
Time to run our logistic regression: 1.1751070022583008 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.14775967597961426 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0


In [8]:
w5 = compare(X2, y2, 'optimized')

Optimization terminated successfully.
         Current function value: 50571.476562
         Iterations: 5
         Function evaluations: 13
         Gradient evaluations: 13
         Hessian evaluations: 5
Time to run our logistic regression: 0.6658849716186523 s
Accuracy of our logistic regression: 0.7536900043487549
Time to run Sklearn implementation 0.039034128189086914 s
Accuracy of Sklearn implementation: 0.7536799907684326

Difference in accuracies: 1.0013580322265625e-05


In [9]:
w6 = compare(X3, y3, 'optimized')

Optimization terminated successfully.
         Current function value: 69312.359375
         Iterations: 3
         Function evaluations: 4
         Gradient evaluations: 4
         Hessian evaluations: 3
Time to run our logistic regression: 0.647352933883667 s
Accuracy of our logistic regression: 0.5035799741744995
Time to run Sklearn implementation 0.025961875915527344 s
Accuracy of Sklearn implementation: 0.5035799741744995

Difference in accuracies: 0.0


w1 = compare(X1, y1, 'original')
# w1 = compare(X1, y1, 'original')
Time to run our logistic regression: 504.76238799095154 s
Accuracy of our logistic regression: 1.0
Time to run Sklearn implementation 0.3529651165008545 s
Accuracy of Sklearn implementation: 1.0

Difference in accuracies: 0.0
w2 = compare(X2, y2, 'original')
# w2 = compare(X2, y2, 'original')
Time to run our logistic regression: 492.10879278182983 s
Accuracy of our logistic regression: 0.75368
Time to run Sklearn implementation 0.2412738800048828 s
Accuracy of Sklearn implementation: 0.75368

Difference in accuracies: 0.0
w3 = compare(X3, y3, 'original')
# w3 = compare(X3, y3, 'original')
Time to run our logistic regression: 482.60274934768677 s
Accuracy of our logistic regression: 0.50117
Time to run Sklearn implementation 0.23366093635559082 s
Accuracy of Sklearn implementation: 0.50358

Difference in accuracies: 0.0024100000000000232