In [1]:
import numpy as np

In [11]:
#Input data
X = np.array([23, 24, 26, 27])
Y = np.array([50, 70, 80, 85])

In [3]:
#Parameters
lr = 0.3
depth = 1
lambda_ = 0 #regularization parameter

In [12]:
#Step 1: Initialize f0 as the mean of Y
f0 = np.mean(Y)
print(f"f0 (initial prediction): {f0}")

f0 (initial prediction): 71.25


In [13]:
#Step 2: Calculate Similarity Score for the root
def similarity_score(residuals, lambda_):
    sum_of_residuals = np.sum(residuals)
    num_of_residuals = len(residuals)
    return sum_of_residuals**2 / (num_of_residuals + lambda_)

residuals_root = Y - f0
similarity_root = similarity_score(residuals_root, lambda_)
print(f"Root Similarity Score: {similarity_root:.2f}")

Root Similarity Score: 0.00


In [14]:
#Step 3: Calculate Similarity Scores for left and right nodes
def node_similarity(X, Y, f0, split_value, lambda_):
    #Split data based on the split_value
    left_branch = X < split_value
    right_branch = X >= split_value

    residuals_left = Y[left_branch] - f0
    residuals_right = Y[right_branch] - f0

    #Calculate similarity scores
    left_similarity = similarity_score(residuals_left, lambda_) if len(residuals_left) > 0 else 0
    right_similarity = similarity_score(residuals_right, lambda_) if len(residuals_right) > 0 else 0

    return left_similarity, right_similarity

In [15]:
#Step 4: Calculate Gain for each split
splits = [23.5, 25, 26.5]
gains = []

for split in splits:
    left_similarity, right_similarity = node_similarity(X, Y, f0, split, lambda_)
    gain = left_similarity + right_similarity - similarity_root
    gains.append((gain, split, left_similarity, right_similarity))
    print(f"Gain for split X < {split}: {gain:.2f}")

Gain for split X < 23.5: 602.08
Gain for split X < 25: 506.25
Gain for split X < 26.5: 252.08


In [16]:
#Step 5: Choose the split with the highest gain
best_gain, best_split, best_left_sim, best_right_sim = max(gains, key=lambda x: x[0])
print(f"Best split: X < {best_split} with gain: {best_gain:.2f}")

Best split: X < 23.5 with gain: 602.08


In [17]:
# Step 6: Calculate Output for left and right nodes
def calculate_output(residuals):
    return np.sum(residuals) / len(residuals)

left_mask = X < best_split
right_mask = X >= best_split

output_left = calculate_output(Y[left_mask] - f0)
output_right = calculate_output(Y[right_mask] - f0)

print(f"Output for left node: {output_left:.2f}")
print(f"Output for right node: {output_right:.2f}")

Output for left node: -21.25
Output for right node: 7.08


In [18]:
# Step 7: Make prediction for x = 25
x_test = 25
if x_test < best_split:
    output = output_left
else:
    output = output_right

prediction = f0 + lr * output
print(f"Prediction for x = {x_test}: {prediction:.2f}")

Prediction for x = 25: 73.38
