In [1]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor, export_text
from sklearn.datasets import load_iris
iris = load_iris()
X_train = iris.data[:, [0, 1]]  
y_train = iris.target.astype(float)
h0 = np.mean(y_train)  
print(f"Initial weak classifier h0: {h0:.4f}")
H = []
H.append(h0)
T = 3
for t in range(T):
    print(f"\nIteration {t+1}:")
    y_pred = np.zeros_like(y_train) + H[0] 
    for h in H[1:]:
        y_pred += h.predict(X_train)
    print(f"\nFirst 5 predicted values ŷ(t-1):")
    print(y_pred[:5])
    residuals = y_train - y_pred
    print(f"\nFirst 5 residuals ri:")
    print(residuals[:5])
    Y_train_new = residuals.copy()
    print(f"\nFirst 5 values of revised Y_train:")
    print(Y_train_new[:5])
    ht = DecisionTreeRegressor(max_depth=2)
    ht.fit(X_train, Y_train_new) 
    print(f"\nTree structure for h{t+1}:")
    print(export_text(ht, feature_names=['X1', 'X2']))
    y_pred_new = ht.predict(X_train)
    print(f"\nFirst 5 predictions for h{t+1}:")
    print(y_pred_new[:5])
    sum_residuals = np.sum([h.predict(X_train) for h in H[1:]], axis=0)
    if sum_residuals.sum() < 0:
        H.append(ht) 
        continue
    else:
        print(f"H^({t-1}) is the final ensemble set.")
        break   
print("\nFinal ensemble size:", len(H)) 

# Complete Gradient Boosting Implementation Explanation:
#
# 1. Data Preparation (18. 1-1):
# - Loads iris dataset for regression
# - Uses first two features as X_train
# - Converts target to float for y_train
#
# 2. Initial Weak Classifier (18. 1-2):
# - Calculates mean of y_train values as h0
# - h0 serves as the first weak classifier
# - Initializes ensemble H with h0
#
# 3. Boosting Loop Setup:
# - Sets T=3 iterations (as specified)
# - Creates empty list to store weak classifiers
# - Prepares for iterative improvement
#
# 4. For Each Iteration t (18. 1-3):
# - Calculates ensemble predictions ŷ(t-1):
#   * Starts with h0 (mean value)
#   * Adds predictions from all trees in ensemble
#   * Gets current estimate for each data point
#
# - Computes residuals:
#   * ri = yi - ŷ(t-1) for each point i
#   * Shows how far current predictions are from true values
#   * These residuals become new training targets
#
# - Updates training data:
#   * Replaces Y_train with residual values
#   * Allows next classifier to focus on errors
#
# 5. New Weak Classifier (18. 1-4):
# - Creates decision tree regressor:
#   * Uses max_depth=2 for weak learning
#   * Fits tree to current residuals
#
# - Visualizes tree structure:
#   * Shows split points and values
#   * Uses export_text for tree visualization
#
# - Displays predictions:
#   * Shows what new tree predicts
#   * Helps track improvement
#
# 6. Convergence Check (18. 1-5):
# - Checks if sum of residuals < 0
# - If true: adds new tree to ensemble
# - If false: stops and uses current ensemble
#
# 7. Key Concepts:
# - Each iteration tries to correct previous mistakes
# - Trees learn from residuals of ensemble
# - Process continues until convergence or T iterations
# - Final ensemble combines all weak learners
#
# 8. Implementation Notes:
# - Uses sklearn's DecisionTreeRegressor
# - Maintains list of all classifiers
# - Prints intermediate results for tracking
# - Shows residuals and predictions at each step

Initial weak classifier h0: 1.0000

Iteration 1:

First 5 predicted values ŷ(t-1):
[1. 1. 1. 1. 1.]

First 5 residuals ri:
[-1. -1. -1. -1. -1.]

First 5 values of revised Y_train:
[-1. -1. -1. -1. -1.]

Tree structure for h1:
|--- X1 <= 5.55
|   |--- X2 <= 2.80
|   |   |--- value: [0.00]
|   |--- X2 >  2.80
|   |   |--- value: [-0.98]
|--- X1 >  5.55
|   |--- X1 <= 6.15
|   |   |--- value: [0.19]
|   |--- X1 >  6.15
|   |   |--- value: [0.71]


First 5 predictions for h1:
[-0.9787234 -0.9787234 -0.9787234 -0.9787234 -0.9787234]
H^(-1) is the final ensemble set.

Final ensemble size: 1
