<a href="https://colab.research.google.com/github/chhavi0987/Model-Evaluation-and-Hyperparameter-Tuning/blob/main/assignment_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from scipy.stats import randint
import warnings
warnings.filterwarnings('ignore')

# Step 1: Load Dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Feature Scaling (Important for Logistic Regression and SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 4: Define Models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

# Step 5: Evaluation Function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1-Score': f1_score(y_test, y_pred)
    }

# Step 6: Train and Evaluate Initial Models
results = {}

for name, model in models.items():
    if name in ['Logistic Regression', 'SVM']:
        model.fit(X_train_scaled, y_train)
        scores = evaluate_model(model, X_test_scaled, y_test)
    else:
        model.fit(X_train, y_train)
        scores = evaluate_model(model, X_test, y_test)
    results[name] = scores

print("\nInitial Model Evaluation Results:")
print(pd.DataFrame(results).T)

# Step 7: Hyperparameter Tuning - RandomizedSearchCV for Random Forest
param_dist_rf = {
    'n_estimators': randint(50, 150),
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

random_search_rf = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_dist_rf,
                                      n_iter=10, cv=3, scoring='f1', random_state=42, verbose=2)
random_search_rf.fit(X_train, y_train)
best_rf = random_search_rf.best_estimator_

# Step 8: Hyperparameter Tuning - RandomizedSearchCV for SVM
param_dist_svm = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf']
}

random_search_svm = RandomizedSearchCV(SVC(), param_distributions=param_dist_svm,
                                       n_iter=5, cv=3, scoring='f1', random_state=42, verbose=2)
random_search_svm.fit(X_train_scaled, y_train)
best_svm = random_search_svm.best_estimator_

# Step 9: Evaluate Tuned Models
tuned_models = {
    'Tuned Random Forest': best_rf,
    'Tuned SVM': best_svm
}

for name, model in tuned_models.items():
    if name == 'Tuned SVM':
        model.fit(X_train_scaled, y_train)
        scores = evaluate_model(model, X_test_scaled, y_test)
    else:
        model.fit(X_train, y_train)
        scores = evaluate_model(model, X_test, y_test)
    results[name] = scores

print("\nAfter Hyperparameter Tuning Results:")
print(pd.DataFrame(results).T)

# Step 10: Detailed Classification Report for Best Model (Tuned Random Forest)
print("\nClassification Report for Best Model (Tuned Random Forest):")
y_pred_best = best_rf.predict(X_test)
print(classification_report(y_test, y_pred_best))



Initial Model Evaluation Results:
                     Accuracy  Precision    Recall  F1-Score
Logistic Regression  0.973684   0.972222  0.985915  0.979021
Random Forest        0.964912   0.958904  0.985915  0.972222
SVM                  0.982456   0.972603  1.000000  0.986111
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .max_depth=20, min_samples_split=2, n_estimators=64; total time=   0.9s
[CV] END .max_depth=20, min_samples_split=2, n_estimators=64; total time=   0.6s
[CV] END .max_depth=20, min_samples_split=2, n_estimators=64; total time=   0.6s
[CV] END .max_depth=20, min_samples_split=2, n_estimators=70; total time=   0.4s
[CV] END .max_depth=20, min_samples_split=2, n_estimators=70; total time=   0.3s
[CV] END .max_depth=20, min_samples_split=2, n_estimators=70; total time=   0.5s
[CV] END max_depth=20, min_samples_split=5, n_estimators=132; total time=   0.7s
[CV] END max_depth=20, min_samples_split=5, n_estimators=132; total time=   0.5s
[CV] END max