# Objective:
1. Train multiple Machine Learning models.

2. Evaluate their performance using accuracy, precision, recall, F1-score.

3. Implement GridSearchCV and RandomizedSearchCV for hyperparameter tuning.

4. Analyze and select the best-performing model.



In [1]:
# Required Python Libraries:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn.datasets import load_iris  # Example dataset

import warnings
warnings.filterwarnings('ignore')


# Dataset:
For demonstration, we will use the Iris dataset (a multiclass classification problem). You can replace it with your own dataset.

In [2]:
# Load the dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Model Training and Evaluation
We will train the following models:

1. Logistic Regression

2. Random Forest Classifier

3. Support Vector Classifier (SVC)

# Define a Model Evaluation Function
This function will calculate accuracy, precision, recall, and F1-score for each model.


In [3]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    
    return accuracy, precision, recall, f1


In [4]:
# Model 1: Logistic Regression
# Create and train Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Evaluate the model
lr_metrics = evaluate_model(lr_model, X_test, y_test)
print("Logistic Regression - Accuracy, Precision, Recall, F1:", lr_metrics)


Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Logistic Regression - Accuracy, Precision, Recall, F1: (1.0, 1.0, 1.0, 1.0)


In [5]:
# Model 2: Random Forest Classifier
# Create and train Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Evaluate the model
rf_metrics = evaluate_model(rf_model, X_test, y_test)
print("Random Forest - Accuracy, Precision, Recall, F1:", rf_metrics)

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Random Forest - Accuracy, Precision, Recall, F1: (1.0, 1.0, 1.0, 1.0)


In [6]:
# Model 3: Support Vector Classifier (SVC)
# Create and train SVC model
svc_model = SVC()
svc_model.fit(X_train, y_train)

# Evaluate the model
svc_metrics = evaluate_model(svc_model, X_test, y_test)
print("SVC - Accuracy, Precision, Recall, F1:", svc_metrics)

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

SVC - Accuracy, Precision, Recall, F1: (1.0, 1.0, 1.0, 1.0)


# Hyperparameter Tuning
Now, we will tune the models using GridSearchCV and RandomizedSearchCV.

# GridSearchCV — for Random Forest
This method tries every possible combination of parameters you provide and finds the best one.

In [7]:
# Define parameter grid for Random Forest
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [2, 4, 6, None],
    'min_samples_split': [2, 5, 10]
}

# Apply GridSearchCV
grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Show the best parameters
print("Best Parameters from GridSearchCV:", grid_search.best_params_)


Best Parameters from GridSearchCV: {'max_depth': 2, 'min_samples_split': 2, 'n_estimators': 150}


# RandomizedSearchCV — for SVC
 This method randomly selects combinations of parameters and tests them, which is faster for large ranges.

In [8]:
# Define parameter distribution for SVC
param_dist = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['linear', 'rbf']
}

# Apply RandomizedSearchCV
random_search = RandomizedSearchCV(SVC(), param_distributions=param_dist, n_iter=10, cv=3, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# Show the best parameters
print("Best Parameters from RandomizedSearchCV:", random_search.best_params_)


Best Parameters from RandomizedSearchCV: {'kernel': 'linear', 'gamma': 1, 'C': 1}


In [9]:
# Tuned Random Forest
# Use the best estimator from GridSearchCV
best_rf = grid_search.best_estimator_

# Evaluate the tuned Random Forest
rf_tuned_metrics = evaluate_model(best_rf, X_test, y_test)
print("Tuned Random Forest - Accuracy, Precision, Recall, F1:", rf_tuned_metrics)


Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Tuned Random Forest - Accuracy, Precision, Recall, F1: (1.0, 1.0, 1.0, 1.0)


In [10]:
# Tuned SVC
# Use the best estimator from RandomizedSearchCV
best_svc = random_search.best_estimator_

# Evaluate the tuned SVC
svc_tuned_metrics = evaluate_model(best_svc, X_test, y_test)
print("Tuned SVC - Accuracy, Precision, Recall, F1:", svc_tuned_metrics)

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Tuned SVC - Accuracy, Precision, Recall, F1: (1.0, 1.0, 1.0, 1.0)
