In [19]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
X, y = load_breast_cancer(return_X_y=True)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state=41)

In [81]:
class LogisticRegressionML():
#     Instantiation
    def __init__(self):
        self.coefs_ = None
    
#     Fitting
    def fit(self, X, y):
        u = -np.log(1/y - 1)
        u = [1 if u_ == np.inf else -1 for u_ in u] # convert +inf values to 1 and -inf values to -1
#         print(u)
        self.coefs_ = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(u)
#         print(self.coefs_)

#     Prediction
    def predict_log_proba(self, Xnew):
        preds_log_proba = np.sum(Xnew * self.coefs_, axis=1)
        
        return(preds_log_proba)
    
    def predict_proba(self, Xnew):
        preds_log_proba = self.predict_log_proba(Xnew)
        preds_proba = np.exp(preds_log_proba) / (1+ np.exp(preds_log_proba))

        return(preds_proba)

    def predict(self, Xnew):
        preds_proba = self.predict_proba(Xnew)
        preds = [1 if proba > 0.5 else 0 for proba in preds_proba]

        return(preds)    
    
#     Scoring
    def score(self, X, y):
        n = len(y)
        y_pred = self.predict(X)
        
        acc = np.sum(y == y_pred)/len(y)

        return acc
        

In [90]:
lr_scratch = LogisticRegressionML()
lr_scratch.fit(X_train, y_train)
preds = lr_scratch.predict(X_test)

  
  


In [92]:
%%timeit
lr_scratch = LogisticRegressionML()
lr_scratch.fit(X_train, y_train)
scratch_score = lr_scratch.score(X_test, y_test)

  
  


563 µs ± 55.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [85]:
from sklearn.linear_model import LogisticRegression

In [93]:
%%timeit
lr = LogisticRegression()
lr.fit(X_train, y_train)

sklearn_score = lr.score(X_test, y_test)

3.52 ms ± 86.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [87]:
print(scratch_score, sklearn_score)

0.965034965035 0.979020979021


In [88]:
sk_preds = lr.predict(X_test)