In [1]:
#Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# scikit-learn tools
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
import timeit
# Cancer dataset
from sklearn.datasets import load_breast_cancer


import logistic_regression as our_model

In [2]:
data = load_breast_cancer()

data['scaled'] = StandardScaler().fit_transform(data['data'])
X_train, X_test, y_train, y_test = train_test_split(data['scaled'], data['target'])

In [3]:
lr_ours = our_model.LogisticRegression()
lr_sk = LogisticRegression()

In [4]:
start_time = timeit.default_timer()
lr_ours.fit(X_train, y_train)
our_time = timeit.default_timer() - start_time

start_time = timeit.default_timer()
lr_sk.fit(X_train, y_train)
sk_time = timeit.default_timer() - start_time


print("Our time: %.3f" % our_time)
print("scikit-learn's time: %.3f" % sk_time)

Iter: 580, Cost: 0.04 Converged in 581 iterations!
Our time: 0.828
scikit-learn's time: 0.012


Our code appears to be much slower. This is likely scikit-learn is highly optimized, and the intensive components are likely running in compiled fortran or C. We also may be using a different converngence criteria by default. 

Let's check the accuracy:

In [5]:
our_acc = accuracy_score(lr_ours.predict(X_test), y_test)
sk_acc = accuracy_score(lr_sk.predict(X_test), y_test)

print("Our accuracy: %.5f" % our_acc)
print("scikit-learn's accuracy: %.5f" % sk_acc)

Our accuracy: 0.97902
scikit-learn's accuracy: 0.98601


The test accuracies are also very similar. The small discrepency may result from differences in convergence criteria or default regularization behavior.

In [6]:
print("Ours:")
print(confusion_matrix(y_test, lr_ours.predict(X_test)))

print("Scikit-Learn:")
print(confusion_matrix(y_test, lr_sk.predict(X_test)))


Ours:
[[59  1]
 [ 2 81]]
Scikit-Learn:
[[59  1]
 [ 1 82]]


Our confusion matrices are also very similar, so our model appears to be working! 