In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

print("----- Question 1: Why Feature Scaling is Essential -----")
print("""
Feature scaling ensures that features contribute equally to model training.
Without scaling, features with larger magnitudes can dominate distance-based algorithms (e.g., KNN, SVM).
It also improves gradient descent convergence and overall model performance.
""")

# Demonstrating effect of unscaled features
X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model_unscaled = LogisticRegression(max_iter=200)
model_unscaled.fit(X_train_raw, y_train)
pred_unscaled = model_unscaled.predict(X_test_raw)
print(f"Accuracy without scaling: {accuracy_score(y_test, pred_unscaled):.4f}")

# Question 2: Min-Max Scaling
scaler_minmax = MinMaxScaler()
X_minmax = scaler_minmax.fit_transform(X)
X_train_mm, X_test_mm, _, _ = train_test_split(X_minmax, y, test_size=0.3, random_state=42)
model_mm = LogisticRegression(max_iter=200)
model_mm.fit(X_train_mm, y_train)
pred_mm = model_mm.predict(X_test_mm)
print(f"Accuracy with Min-Max Scaling: {accuracy_score(y_test, pred_mm):.4f}")

# Question 3: Standardization (Z-score)
scaler_std = StandardScaler()
X_std = scaler_std.fit_transform(X)
X_train_std, X_test_std, _, _ = train_test_split(X_std, y, test_size=0.3, random_state=42)
model_std = LogisticRegression(max_iter=200)
model_std.fit(X_train_std, y_train)
pred_std = model_std.predict(X_test_std)
print(f"Accuracy with Standardization: {accuracy_score(y_test, pred_std):.4f}")

# Question 4: Robust Scaling
scaler_robust = RobustScaler()
X_robust = scaler_robust.fit_transform(X)
X_train_rb, X_test_rb, _, _ = train_test_split(X_robust, y, test_size=0.3, random_state=42)
model_rb = LogisticRegression(max_iter=200)
model_rb.fit(X_train_rb, y_train)
pred_rb = model_rb.predict(X_test_rb)
print(f"Accuracy with Robust Scaling: {accuracy_score(y_test, pred_rb):.4f}")


----- Question 1: Why Feature Scaling is Essential -----

Feature scaling ensures that features contribute equally to model training.
Without scaling, features with larger magnitudes can dominate distance-based algorithms (e.g., KNN, SVM).
It also improves gradient descent convergence and overall model performance.

Accuracy without scaling: 1.0000
Accuracy with Min-Max Scaling: 0.9111
Accuracy with Standardization: 1.0000
Accuracy with Robust Scaling: 0.9778
