#### **Imports**

In [1]:
import numpy as np

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from scratchml.supervised_learning.linear import LogisticRegressionSML

## **Data**

We will be using the https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html dataset to test the model.

In [2]:
# Load the breast cancer dataset
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

In [3]:
scaler = StandardScaler()

# Fit the scaler to the features
scaler.fit(X)

# Perform feature scaling on the features
X_scaled = scaler.transform(X)

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (455, 30)
y_train shape: (455,)
X_test shape: (114, 30)
y_test shape: (114,)


## **Linear Regression Implementation**

Here an implementation of Linear Regression(from scratch) using only numpy is provided. The LinearRegression class allows to fit the model using the OLS method or Gradient Descent.

In [5]:
# Insantiate models
lr_sklearn = LogisticRegression()
lr_gd = LogisticRegressionSML()
lr_sgd = LogisticRegressionSML()

# Fit models
lr_sklearn.fit(X_train,y_train)
lr_gd.fit(X_train,y_train,method="gradientdesc",lr=0.001, num_iter=100)
lr_sgd.fit(X_train,y_train,method="stochasticgradientdesc",lr=0.001, num_iter=100)

Iteration 10: Accuracy = 0.927; Loss = 0.19562
Iteration 20: Accuracy = 0.960; Loss = 0.13003
Iteration 30: Accuracy = 0.963; Loss = 0.10684
Iteration 40: Accuracy = 0.969; Loss = 0.09475
Iteration 50: Accuracy = 0.976; Loss = 0.08756
Iteration 60: Accuracy = 0.976; Loss = 0.08282
Iteration 70: Accuracy = 0.978; Loss = 0.07942
Iteration 80: Accuracy = 0.978; Loss = 0.07679
Iteration 90: Accuracy = 0.980; Loss = 0.07466
Iteration 100: Accuracy = 0.982; Loss = 0.07288
Epoch 10: Loss = 2.50505, Accuracy = 0.927
Epoch 20: Loss = 1.44231, Accuracy = 0.958
Epoch 30: Loss = 1.29049, Accuracy = 0.963
Epoch 40: Loss = 0.98684, Accuracy = 0.971
Epoch 50: Loss = 0.83502, Accuracy = 0.976
Epoch 60: Loss = 0.75911, Accuracy = 0.978
Epoch 70: Loss = 0.75911, Accuracy = 0.978
Epoch 80: Loss = 0.75911, Accuracy = 0.978
Epoch 90: Loss = 0.60729, Accuracy = 0.982
Epoch 100: Loss = 0.60729, Accuracy = 0.982


In [6]:
# Evaluate models
y_pred = lr_sklearn.predict(X_test)  # Predict target values for the test data
acc_sklearn = accuracy_score(y_test, y_pred)

y_pred_gd = lr_gd.predict(X_test)
acc_gd = accuracy_score(y_test, y_pred_gd)

y_pred_sgd = lr_gd.predict(X_test)
acc_sgd = accuracy_score(y_test, y_pred_sgd)

print(f"Acc for (Sklearn): {acc_sklearn}")
print(f"Acc for GD: {acc_gd}")
print(f"Acc for SGD: {acc_sgd}")

Acc for (Sklearn): 0.9736842105263158
Acc for GD: 0.9824561403508771
Acc for SGD: 0.9824561403508771
