In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler

# Classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

import warnings
warnings.filterwarnings('ignore')

# Load Dataset

In [2]:
# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Train and Evaluate Multiple Models

In [3]:
# Define models
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Evaluate models
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='macro'),
        "Recall": recall_score(y_test, y_pred, average='macro'),
        "F1 Score": f1_score(y_test, y_pred, average='macro')
    })

# Show results
results_df = pd.DataFrame(results).sort_values(by="F1 Score", ascending=False)
results_df


Unnamed: 0,Model,Accuracy,Precision,Recall,F1 Score
1,Random Forest,0.966667,0.969697,0.966667,0.966583
2,Support Vector Machine,0.966667,0.969697,0.966667,0.966583
3,Decision Tree,0.966667,0.969697,0.966667,0.966583
0,Logistic Regression,0.933333,0.933333,0.933333,0.933333
4,K-Nearest Neighbors,0.933333,0.944444,0.933333,0.93266


# Hyperparameter Tuning
GridSearchCV for SVM

In [4]:
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_svm = GridSearchCV(SVC(), param_grid_svm, cv=5, scoring='f1_macro')
grid_svm.fit(X_train, y_train)

print("Best Parameters for SVM (GridSearch):", grid_svm.best_params_)
print("Best F1 Score:", grid_svm.best_score_)


Best Parameters for SVM (GridSearch): {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Best F1 Score: 0.9749019607843138


RandomizedSearchCV for Random Forest

In [5]:
param_dist_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

random_rf = RandomizedSearchCV(RandomForestClassifier(), param_distributions=param_dist_rf,
                               n_iter=10, cv=5, scoring='f1_macro', random_state=42)
random_rf.fit(X_train, y_train)

print("Best Parameters for RF (RandomizedSearch):", random_rf.best_params_)
print("Best F1 Score:", random_rf.best_score_)


Best Parameters for RF (RandomizedSearch): {'n_estimators': 200, 'min_samples_split': 5, 'max_depth': 10, 'criterion': 'entropy'}
Best F1 Score: 0.9581699346405228


# Final Evaluation of Best Models

In [6]:
# Evaluate best SVM
best_svm = grid_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)

print("\nClassification Report for Tuned SVM:")
print(classification_report(y_test, y_pred_svm))

# Evaluate best RF
best_rf = random_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)

print("\nClassification Report for Tuned Random Forest:")
print(classification_report(y_test, y_pred_rf))


Classification Report for Tuned SVM:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Classification Report for Tuned Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



# Summary & Best Model Selection

In [9]:
final_results = {
    "Tuned SVM": f1_score(y_test, y_pred_svm, average='macro'),
    "Tuned RF": f1_score(y_test, y_pred_rf, average='macro')
}

best_model_name = max(final_results, key=final_results.get)
print(f"\nBest Performing Model After Tuning: {best_model_name} with F1 Score = {final_results[best_model_name]:.4f}")



Best Performing Model After Tuning: Tuned SVM with F1 Score = 1.0000


# Conclusion