In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

from kaggle.models.LogisticRegression import LogisticRegressionCustom

import numpy as np

from datetime import datetime

X, y = make_classification(n_samples=1000000, n_features=150, n_informative=80, n_redundant=20)
X = np.column_stack((np.ones(len(X)), X)) # include intercept to dataset

def sklearn():
    # Scikit-Learn
    start = datetime.now()
    model = LogisticRegression(solver='newton-cg', fit_intercept=False)
    model.fit(X, y)
    y_hat = model.predict_proba(X)[:, 1]

    print('nb iterations : {}'.format(model.n_iter_))
    print('logloss : {:.5f}'.format(log_loss(y, y_hat)))
    print('time : {}\n'.format(datetime.now() - start))

def custom():
    # Custom Newton-cg
    start = datetime.now()
    model = LogisticRegressionCustom()
    model.fit(X, y)
    y_hat = model.predict_proba(X)[:, 1]

    print('nb iterations : {}'.format(model.n_iter_))
    print('logloss : {:.5f}'.format(log_loss(y, y_hat)))
    print('time : {}\n'.format(datetime.now() - start))


In [3]:
%prun -l 4 custom()

nb iterations : 6
logloss : 0.41251
time : 0:00:06.713664

 

In [None]:
         2844 function calls in 6.714 seconds

   Ordered by: internal time
   List reduced from 142 to 4 due to restriction <4>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    6.221    6.221    6.436    6.436 LogisticRegression.py:76(_newton_cg)
        8    0.232    0.029    0.232    0.029 LogisticRegression.py:30(_sigmoid)
        7    0.077    0.011    0.077    0.011 {method 'sort' of 'numpy.ndarray' objects}
        1    0.058    0.058    0.208    0.208 classification.py:1544(log_loss)

In [2]:
%prun -l 4 sklearn()



nb iterations : [22]
logloss : 0.41251
time : 0:00:47.256582

 

In [None]:
         9517 function calls (9483 primitive calls) in 47.257 seconds

   Ordered by: internal time
   List reduced from 307 to 4 due to restriction <4>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      686   30.493    0.044   30.493    0.044 {method 'dot' of 'numpy.ndarray' objects}
       23    7.233    0.314    9.309    0.405 logistic.py:167(_logistic_grad_hess)
      922    4.884    0.005    4.884    0.005 {built-in method numpy.core.multiarray.dot}
       48    3.084    0.064    3.084    0.064 {sklearn.utils._logistic_sigmoid._log_logistic_sigmoid}