In [1]:
# One-liner: Import libraries, set options, and lock a random seed for reproducibility.
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
np.random.seed(42)
plt.rcParams["figure.figsize"] = (8,5)


In [2]:
# One-liner: Load the Wine dataset (178 samples, 13 features, 3 classes) and preview shapes.
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target, name="target")
X.shape, y.value_counts().sort_index()


((178, 13),
 target
 0    59
 1    71
 2    48
 Name: count, dtype: int64)

In [3]:
# One-liner: Split data into 70% train, 20% validation, 10% test with stratification.
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.30, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1/3, stratify=y_temp, random_state=42)

len(X_train), len(X_val), len(X_test)


(124, 36, 18)

In [4]:
# One-liner: Define helpers to compute metrics and to format results as a comparison table row.
def eval_metrics(y_true, y_pred, average="macro"):
    return {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred, average=average, zero_division=0),
        "recall": recall_score(y_true, y_pred, average=average, zero_division=0),
        "f1": f1_score(y_true, y_pred, average=average, zero_division=0),
    }

def add_result(results, label, y_true, y_pred):
    m = eval_metrics(y_true, y_pred)
    results.append({"model": label, **m})


In [None]:
# One-liner: Train baseline trees (gini and entropy) and evaluate on validation and test sets.
results = []

# Baseline CART (gini)
cart = DecisionTreeClassifier(criterion="gini", random_state=42)
cart.fit(X_train, y_train)
add_result(results, "CART (gini) – VAL", y_val, cart.predict(X_val))
add_result(results, "CART (gini) – TEST", y_test, cart.predict(X_test))

# “ID3-like” (entropy) — still binary splits in sklearn
id3_like = DecisionTreeClassifier(criterion="entropy", random_state=42)
id3_like.fit(X_train, y_train)
add_result(results, "ID3-like (entropy) – VAL", y_val, id3_like.predict(X_val))
add_result(results, "ID3-like (entropy) – TEST", y_test, id3_like.predict(X_test))

pd.DataFrame(results)
