# 📊 Model Evaluation and Optimization
This notebook demonstrates evaluation metrics and techniques in regression and classification, as well as strategies for handling imbalanced data and introducing differential privacy in model training.

## 📈 Regression Metrics

In [None]:

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_true = [3.0, -0.5, 2.0, 7.0]
y_pred = [2.5, 0.0, 2.1, 7.8]

mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"MAE: {mae:.2f}, MSE: {mse:.2f}, R²: {r2:.2f}")


## 🧠 Classification Metrics

In [None]:

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

y_true = [0, 1, 1, 0, 1, 0]
y_pred = [0, 1, 0, 0, 1, 1]

acc = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {acc:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")
print("Confusion Matrix:\n", cm)


## ⚖️ Handling Imbalanced Data using SMOTE

In [None]:

from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from collections import Counter

X, y = make_classification(n_samples=1000, n_features=2, weights=[0.9], flip_y=0, random_state=1)
print("Before SMOTE:", Counter(y))

smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
print("After SMOTE:", Counter(y_res))


## 📉 Underfitting vs Overfitting (Visualization)

In [None]:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

np.random.seed(0)
X = np.sort(5 * np.random.rand(80, 1), axis=0)
y = np.sin(X).ravel() + np.random.normal(0, 0.2, X.shape[0])

plt.scatter(X, y, color='black', label='data')

# Underfitting
model_linear = LinearRegression().fit(X, y)
plt.plot(X, model_linear.predict(X), color='red', label='Underfit (Linear)')

# Overfitting
poly = PolynomialFeatures(degree=15)
X_poly = poly.fit_transform(X)
model_poly = LinearRegression().fit(X_poly, y)
plt.plot(X, model_poly.predict(X_poly), color='blue', label='Overfit (Poly deg=15)')

plt.legend()
plt.title("Underfitting vs Overfitting")
plt.show()


## 🔐 Differential Privacy in Model Optimization (Concept Only)

In [None]:

# DP-SGD example with PyTorch and Opacus
# NOTE: Install packages before running
# pip install torch torchvision opacus

import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from opacus import PrivacyEngine

transform = transforms.Compose([transforms.ToTensor()])
train_loader = DataLoader(datasets.MNIST('.', train=True, download=True, transform=transform), batch_size=64, shuffle=True)

model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28 * 28, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

optimizer = optim.SGD(model.parameters(), lr=0.01)
privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=1.0,
    max_grad_norm=1.0,
)

print("Differential privacy enabled model ready for training.")
