<a href="https://colab.research.google.com/github/ashkan-motamedifar/iris-ml-model-comparison/blob/main/iris_model_comparison_grid_search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
# 1) Data
iris = load_iris()
X = iris.data
y = iris.target

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.30, stratify=y, random_state=42)
X_tr.shape, X_te.shape, y_te.shape, y_tr.shape

((105, 4), (45, 4), (45,), (105,))

In [None]:
# 2) CV splitter (balanced + reproducible)
cv5 = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
# 3) Pipelines (scale where it matters; trees don’t care but kept for uniformity)
pipes = {
    "LogReg": Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression(max_iter=500, random_state=42))]),
    "SVM":    Pipeline([("scaler", StandardScaler()), ("clf", SVC(random_state=42))]),
    "KNN":    Pipeline([("scaler", StandardScaler()), ("clf", KNeighborsClassifier())]),
    "DT":     Pipeline([("scaler", StandardScaler()), ("clf", DecisionTreeClassifier(random_state=42))]),
    "RF":     Pipeline([("scaler", StandardScaler()), ("clf", RandomForestClassifier(random_state=42))]),
}

In [None]:
# 4) Param grids (note: clf__ prefix for inside-pipeline params)
grids = {
    "LogReg": {"clf__C":[0.1,1,10], "clf__penalty":["l2"], "clf__solver":["lbfgs"]},
    "SVM":    {"clf__kernel":["linear","rbf"], "clf__C":[0.1,1,10], "clf__gamma":["scale","auto"]},
    "KNN":    {"clf__n_neighbors":[3,5,7,9], "clf__weights":["uniform","distance"], "clf__p":[1,2]},
    "DT":     {"clf__criterion":["gini","entropy"], "clf__max_depth":[None,3,5,7], "clf__min_samples_split":[2,5,10]},
    "RF":     {"clf__n_estimators":[100,200,400], "clf__max_depth":[None,5,10], "clf__min_samples_split":[2,5,10]},
}

In [None]:
def tune_and_test(name, est, grid):
    gs = GridSearchCV(estimator=est, param_grid=grid, cv=5, scoring="f1_macro", n_jobs=-1, refit=True)
    gs.fit(X_tr, y_tr)                              # CV on TRAIN; refit best on full TRAIN
    y_hat = gs.best_estimator_.predict(X_te)        # one final TEST evaluation

    return {
        "Model": name,
        "CV_F1": round(gs.best_score_, 4),                                  # mean CV on TRAIN folds
        "Test_ACC": round(accuracy_score(y_te, y_hat), 4),
        "Test_Precision": round(precision_score(y_te, y_hat, average="macro", zero_division=0), 4),
        "Test_Recall": round(recall_score(y_te, y_hat, average="macro", zero_division=0), 4),
        "Test_F1": round(f1_score(y_te, y_hat, average="macro"), 4),
        "Best_Params": gs.best_params_
    }

In [None]:
rows = [tune_and_test(n, m, grids[n]) for n, m in pipes.items()]
res = (pd.DataFrame(rows)
         .sort_values("Test_F1", ascending=False)
         .reset_index(drop=True))
print(res.to_string(index=False))

 Model  CV_F1  Test_ACC  Test_Precision  Test_Recall  Test_F1                                                                     Best_Params
    DT 0.9522    0.9556          0.9608       0.9556   0.9554 {'clf__criterion': 'gini', 'clf__max_depth': None, 'clf__min_samples_split': 5}
LogReg 0.9809    0.9111          0.9155       0.9111   0.9107                     {'clf__C': 1, 'clf__penalty': 'l2', 'clf__solver': 'lbfgs'}
   SVM 0.9714    0.9111          0.9155       0.9111   0.9107                 {'clf__C': 0.1, 'clf__gamma': 'scale', 'clf__kernel': 'linear'}
   KNN 0.9713    0.9111          0.9298       0.9111   0.9095                 {'clf__n_neighbors': 5, 'clf__p': 1, 'clf__weights': 'uniform'}
    RF 0.9522    0.8889          0.8981       0.8889   0.8878 {'clf__max_depth': None, 'clf__min_samples_split': 2, 'clf__n_estimators': 100}
