In [5]:
# Minimal Iris -> Decision Tree (with simple preprocessing)
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# 1) Load data as a DataFrame
iris = load_iris(as_frame=True)
X = iris.data                     # numeric features
y = iris.target                   # numeric labels already (0,1,2)
target_names = iris.target_names  # ['setosa','versicolor','virginica']


# 2) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enc, test_size=0.2, random_state=42, stratify=y_enc
)

# 3) Build a simple pipeline: impute missing -> Decision Tree
# (Iris has no missing values, but this makes code robust)
pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("model", DecisionTreeClassifier(random_state=42))
])

# 4) Train
pipe.fit(X_train, y_train)

# 5) Predict
y_pred = pipe.predict(X_test)

# 6) Metrics (macro = treats all classes equally)
acc  = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average="macro", zero_division=0)
rec  = recall_score(y_test, y_pred, average="macro", zero_division=0)

print(f"Accuracy : {acc:.4f}")
print(f"Precision (macro): {prec:.4f}")
print(f"Recall    (macro): {rec:.4f}")

print("\nClassification report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy : 0.9333
Precision (macro): 0.9333
Recall    (macro): 0.9333

Classification report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.90      0.90      0.90        10
   virginica       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



In [7]:
# Iris ≥95% with a tuned Decision Tree (simple & clear)

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

# 1) Load data
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target  # already numeric labels (0=setosa, 1=versicolor, 2=virginica)

# 2) Train/test split (stratified so class balance is preserved)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 3) Pipeline: impute (robustness) -> Decision Tree
pipe = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("clf", DecisionTreeClassifier(random_state=0))
])

# 4) Small hyperparameter grid (kept minimal)
param_grid = {
    "clf__criterion": ["gini", "entropy", "log_loss"],
    "clf__max_depth": [5, 6, 7, 8, None],
    "clf__min_samples_split": [2, 4, 6],
    "clf__min_samples_leaf": [1, 2, 3],
}

# 5) Grid search with 5-fold CV
grid = GridSearchCV(
    pipe,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1
)
grid.fit(X_train, y_train)

best_model = grid.best_estimator_
print("Best params:", grid.best_params_)

# 6) Evaluate on the held-out test set
y_pred = best_model.predict(X_test)
acc  = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average="macro", zero_division=0)
rec  = recall_score(y_test, y_pred, average="macro", zero_division=0)

print(f"\nAccuracy : {acc:.4f}")
print(f"Precision (macro): {prec:.4f}")
print(f"Recall    (macro): {rec:.4f}")

print("\nClassification report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))


Best params: {'clf__criterion': 'gini', 'clf__max_depth': 5, 'clf__min_samples_leaf': 1, 'clf__min_samples_split': 2}

Accuracy : 0.9667
Precision (macro): 0.9697
Recall    (macro): 0.9667

Classification report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      0.90      0.95        10
   virginica       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

