# **CS351-Lab 11**
**Advanced Model Evaluation and Hyperparameter Tuning**



1. Computing advanced model evaluation metrics beyond accuracy (e.g.,precision, recall, F1-score, ROC-AUC).
2.   Implement k-fold and stratified k-fold cross-validation to ensure robust model evaluation.
3. Perform hyperparameter tuning using Grid Search and Random Search


Import Libraries and Load the Digits Dataset

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
# Load the Digits dataset
data = load_digits()
X = data.data  # Features
y = data.target  # Labels

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training samples: {X_train.shape[0]}, Testing samples: {X_test.shape[0]}")


Training samples: 1437, Testing samples: 360


Evaluate Model with Advanced Metrics

In [4]:
# Train a simple Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
# Predict on the test set
y_pred = model.predict(X_test)

# Calculate evaluation metrics for multi-class classification
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')  # weighted for multi-class
recall = recall_score(y_test, y_pred, average='weighted')  # weighted for multi-class
f1 = f1_score(y_test, y_pred, average='weighted')  # weighted for multi-class

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Display a detailed classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.96
Precision: 0.96
Recall: 0.96
F1 Score: 0.96

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97        36
           1       0.90      0.97      0.93        36
           2       1.00      0.97      0.99        35
           3       0.97      0.97      0.97        37
           4       0.97      0.97      0.97        36
           5       0.97      1.00      0.99        37
           6       1.00      0.97      0.99        36
           7       0.92      1.00      0.96        36
           8       0.94      0.86      0.90        35
           9       0.97      0.92      0.94        36

    accuracy                           0.96       360
   macro avg       0.96      0.96      0.96       360
weighted avg       0.96      0.96      0.96       360



Cross-Validation Techniques

In [5]:
# Perform k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_accuracies = []

for train_index, val_index in kf.split(X_train):
    X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
    y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

    model = RandomForestClassifier(random_state=42)
    model.fit(X_fold_train, y_fold_train)
    y_fold_pred = model.predict(X_fold_val)
    fold_accuracies.append(accuracy_score(y_fold_val, y_fold_pred))

print(f"K-Fold Cross-Validation Accuracies: {fold_accuracies}")
print(f"Mean Accuracy: {np.mean(fold_accuracies):.2f}")

K-Fold Cross-Validation Accuracies: [0.96875, 0.9791666666666666, 0.9651567944250871, 0.9616724738675958, 0.975609756097561]
Mean Accuracy: 0.97


In [6]:
# Perform stratified k-fold cross-validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stratified_accuracies = []

for train_index, val_index in skf.split(X_train, y_train):
    X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
    y_fold_train, y_fold_val = y_train[train_index], y_train[val_index]

    model = RandomForestClassifier(random_state=42)
    model.fit(X_fold_train, y_fold_train)
    y_fold_pred = model.predict(X_fold_val)
    stratified_accuracies.append(accuracy_score(y_fold_val, y_fold_pred))

print(f"Stratified K-Fold Accuracies: {stratified_accuracies}")
print(f"Mean Stratified Accuracy: {np.mean(stratified_accuracies):.2f}")

Stratified K-Fold Accuracies: [0.9722222222222222, 0.9756944444444444, 0.975609756097561, 0.9581881533101045, 0.9825783972125436]
Mean Stratified Accuracy: 0.97


Hyperparameter Tuning

In [7]:
# Perform Grid Search for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters from Grid Search:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)



Best Parameters from Grid Search: {'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 200}
Best Cross-Validation Accuracy: 0.975633952768099


In [8]:
# Perform Random Search for hyperparameter tuning
param_dist = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10, 15]
}

random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_dist, n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

print("Best Parameters from Random Search:", random_search.best_params_)
print("Best Cross-Validation Accuracy:", random_search.best_score_)

Best Parameters from Random Search: {'n_estimators': 50, 'min_samples_split': 5, 'max_depth': None}
Best Cross-Validation Accuracy: 0.9686822493224932
