<a href="https://colab.research.google.com/github/Yoni-Git/Better-Organized/blob/master/Homework2_Question2_7401577e_0177_4fe1_a94e_0ada500332eb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import load_breast_cancer

def run_adaboost_depth_sweep(depths = [1,2,3,4,5], n_estimators=50, cv_folds=10, seed=123):
    data = load_breast_cancer()
    X, y = data.data, data.target
    kf = KFold(n_splits=cv_folds, shuffle=True, random_state=seed)
    acc_means, acc_stds = [], []
    for d in depths:
        dt = DecisionTreeClassifier(max_depth=d, random_state=seed)
        try:
            clf = AdaBoostClassifier(estimator=dt, n_estimators=n_estimators, random_state=seed)
        except TypeError:
            clf = AdaBoostClassifier(base_estimator=dt, n_estimators=n_estimators, random_state=seed)
        scores = cross_val_score(clf, X, y, cv=kf, scoring="accuracy")
        acc_means.append(float(np.mean(scores)))
        acc_stds.append(float(np.std(scores)))
    return {"depths": depths, "acc_means": acc_means, "acc_stds": acc_stds}

def plot_acc_vs_depth(res: dict, out_path: str = "q2_adaboost_accuracy_vs_depth.png") -> None:
    plt.figure()
    plt.plot(res["depths"], res["acc_means"], marker="o")
    plt.xlabel("Decision tree depth (base learner)")
    plt.ylabel("10-fold CV accuracy")
    plt.title("Problem 2: AdaBoost accuracy vs base tree depth")
    plt.savefig(out_path, bbox_inches="tight")
    plt.close()
    print(f"Saved plot to: {out_path}")

if __name__ == "__main__":
    res = run_adaboost_depth_sweep()
    print("Per-depth (mean, std):")
    for d, m, s in zip(res["depths"], res["acc_means"], res["acc_stds"]):
        print(f" depth={d}: mean={m:.4f}, std={s:.4f}")
    plot_acc_vs_depth(res)


Per-depth (mean, std):
 depth=1: mean=0.9665, std=0.0218
 depth=2: mean=0.9595, std=0.0297
 depth=3: mean=0.9701, std=0.0224
 depth=4: mean=0.9683, std=0.0249
 depth=5: mean=0.9683, std=0.0192
Saved plot to: q2_adaboost_accuracy_vs_depth.png
