## Import Library

In [1]:
import numpy as np
from ml_from_scratch.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import  classification_report

from itertools import product

## Load Data

In [2]:
dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target

X, y = dataset.data, dataset.target 
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Modelling

In [3]:
clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

  return 1 / (1 + np.exp(-z))


In [4]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93        43
           1       1.00      0.92      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.96      0.95       114
weighted avg       0.95      0.95      0.95       114



In [5]:
clf.loss

3.050577341872551

## Hyperparameter

In [6]:
learning_rate = [1e-4, 1e-3, 1e-2, 1e-1]
iteration = [250, 500, 1000, 2000]

clf_best_loss = np.inf
clf_best = None

for lr, itr in product(learning_rate, iteration):
    temp_clf = LogisticRegression(lr, itr)
    temp_clf.fit(X_train, y_train)

    if clf_best_loss > temp_clf.loss:
        clf_best_loss = temp_clf.loss
        clf_best = temp_clf

    print(f'parameter : learning rate {lr}, num iterations {itr} | log loss {temp_clf.loss}')

parameter : learning rate 0.0001, num iterations 250 | log loss 0.586515749755615
parameter : learning rate 0.0001, num iterations 500 | log loss 0.4467435135411476
parameter : learning rate 0.0001, num iterations 1000 | log loss 4.473646457691497
parameter : learning rate 0.0001, num iterations 2000 | log loss 0.3666223689155307
parameter : learning rate 0.001, num iterations 250 | log loss 3.2003086500086804
parameter : learning rate 0.001, num iterations 500 | log loss 2.5708342499791716


  return 1 / (1 + np.exp(-z))


parameter : learning rate 0.001, num iterations 1000 | log loss 2.467164768247537
parameter : learning rate 0.001, num iterations 2000 | log loss 14.792963986284569
parameter : learning rate 0.01, num iterations 250 | log loss 3.5762055431276685
parameter : learning rate 0.01, num iterations 500 | log loss 2.9285346589935153
parameter : learning rate 0.01, num iterations 1000 | log loss 3.050577341872551
parameter : learning rate 0.01, num iterations 2000 | log loss 5.0343602468666
parameter : learning rate 0.1, num iterations 250 | log loss 3.640142928403272
parameter : learning rate 0.1, num iterations 500 | log loss 3.1882421927843168
parameter : learning rate 0.1, num iterations 1000 | log loss 3.036426910158029
parameter : learning rate 0.1, num iterations 2000 | log loss 20.19190004625548


In [7]:
clf_best.__dict__

{'learning_rate': 0.0001,
 'num_iterations': 2000,
 'weights': array([ 3.61520767e-02,  2.43831549e-02,  1.97884105e-01,  5.09152413e-02,
         2.47095463e-04, -5.22576313e-04, -1.15256682e-03, -4.65742016e-04,
         4.71895558e-04,  2.24883228e-04,  2.21954405e-05,  1.96612063e-03,
        -3.25003399e-03, -1.03660149e-01,  4.12003352e-06, -1.43439957e-04,
        -2.14400312e-04, -4.19227876e-05,  1.98132935e-05, -4.39156928e-06,
         3.85773147e-02,  2.44031176e-02,  1.91524304e-01, -8.70708180e-02,
         2.52511892e-04, -1.91480780e-03, -2.91158031e-03, -6.93361133e-04,
         4.03888899e-04,  1.15825798e-04]),
 'bias': 0.004793749410828046,
 'loss': 0.3666223689155307}

In [8]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93        43
           1       1.00      0.92      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.96      0.95       114
weighted avg       0.95      0.95      0.95       114

