In [None]:
import os, datetime as dt, numpy as np, pandas as pd
from scipy.special import expit
from sklearn.metrics import roc_auc_score

# assumes fit_gradient_tree(X, gradients, max_depth, random_state) is already defined

def fit_gradient_boosting(X, y, n_iters, max_depth, learning_rate,
                          save_csv=True, random_state=0):
    """
    Train a gradient-boosted tree for binary classification.

    Parameters
    ----------
    X, y            : training data  (arrays)
    n_iters         : int            number of boosting rounds
    max_depth       : int            depth of each regression tree
    learning_rate   : float          shrinkage coefficient η
    save_csv        : bool           write AUC history to q5 data/ (default True)
    random_state    : int            seed for reproducibility (kept optional)

    Returns
    -------
    model           : dict           fitted ensemble
    auc_history     : list[float]    training AUC after each round
    csv_path        : str | None     file written (None if save_csv=False)
    """
    rng    = np.random.default_rng(random_state)
    prior  = np.clip(y.mean(), 1e-8, 1-1e-8)
    f      = np.full_like(y, np.log(prior / (1 - prior)), dtype=float)
    trees, auc_hist = [], []

    for t in range(n_iters):
        residuals = y - expit(f)                # pseudo-residuals
        tree      = fit_gradient_tree(
                        X, residuals,
                        max_depth=max_depth,
                        random_state=rng.integers(1e9))
        trees.append(tree)
        f += learning_rate * tree.predict(X)    # update logits
        auc_hist.append(roc_auc_score(y, expit(f)))

    model = {"trees": trees,
             "learning_rate": learning_rate,
             "initial_score": np.log(prior / (1 - prior))}

    csv_path = None
    if save_csv:
        os.makedirs("q5 data", exist_ok=True)
        ts = dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        csv_path = f"q5 data/auc_{ts}.csv"
        pd.DataFrame({
            "iteration": np.arange(1, n_iters + 1),
            "train_auc": auc_hist
        }).to_csv(csv_path, index=False)

    return model, auc_hist, csv_path

Question 3

### Q3: Gradient of cross-entropy objective

Let's define:
$p_{t,i} = \sigma(f_t(x_i)) = \frac{1}{1+e^{-f_t(x_i)}}$

The loss at iteration $t$ is:
$$L_t = -\sum_{i=1}^{n} \left[ y_i \log p_{t,i} + (1-y_i)\log(1-p_{t,i}) \right]$$

#### Step 1: Find derivative w.r.t. probability
$$\frac{\partial L_t}{\partial p_{t,i}} = -\left(\frac{y_i}{p_{t,i}} - \frac{1-y_i}{1-p_{t,i}}\right)$$

#### Step 2: Find derivative of probability w.r.t. score
$$\frac{\partial p_{t,i}}{\partial f_{t,i}} = p_{t,i}(1-p_{t,i})$$

#### Step 3: Apply chain rule
$$\frac{\partial L_t}{\partial f_{t,i}} = \frac{\partial L_t}{\partial p_{t,i}} \cdot \frac{\partial p_{t,i}}{\partial f_{t,i}} = (p_{t,i}-y_i)$$

#### Step 4: Pseudo-residual we fit in next iteration
$$r_{t,i} = y_i - p_{t,i}$$