-------------------------
### demonstrate how scaling helps in logistic regression
------------------------------

In [30]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [31]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [32]:
# Split the dataset into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [33]:
# Create a logistic regression model without feature scaling
logreg_no_scaling = LogisticRegression(solver='liblinear')
logreg_no_scaling.fit(X_train, y_train)


In [34]:
# Predictions without scaling
y_pred_no_scaling = logreg_no_scaling.predict(X_test)


In [35]:
# Create a logistic regression model with feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [36]:
logreg_with_scaling = LogisticRegression(solver='liblinear')
logreg_with_scaling.fit(X_train_scaled, y_train)


In [37]:
# Predictions with scaling
y_pred_with_scaling = logreg_with_scaling.predict(X_test_scaled)


In [38]:
# Compare the accuracy of both models
accuracy_no_scaling = accuracy_score(y_test, y_pred_no_scaling)
accuracy_with_scaling = accuracy_score(y_test, y_pred_with_scaling)

print("Accuracy without scaling:", accuracy_no_scaling)
print("Accuracy with scaling:", accuracy_with_scaling)


Accuracy without scaling: 0.9649122807017544
Accuracy with scaling: 0.9824561403508771


In [40]:
# Monitor convergence by checking the number of iterations
print("Number of iterations without scaling:", logreg_no_scaling.n_iter_[0])
print("Number of iterations with scaling:", logreg_with_scaling.n_iter_[0])

Number of iterations without scaling: 21
Number of iterations with scaling: 8
