In [1]:
import numpy as np

In [2]:
#Input data
X = np.array([23, 24, 26, 27])
Y = np.array([0, 0, 1, 1])

In [3]:
#Parameters
lr = 0.3
depth = 1
lambda_ = 0 #regularization parameter

In [4]:
#Step 1: Initialize f0 = 0.5
f0 = 0.5
pre_prob = f0

In [5]:
#Step 2: Calculate Similarity Score for the root
residuals = Y - f0
sum_of_residuals = np.sum(residuals)
similarity_root = sum_of_residuals**2 / (pre_prob * (1 - pre_prob) + lambda_)
print(f"Root Similarity Score: {similarity_root}")

Root Similarity Score: 0.0


In [6]:
#Step 3: Calculate Similarity Score for left and right nodes
def similarity_score_nodes(Y_left, Y_right, f0, lambda_):
    #Calculate residuals for each node
    residuals_left = Y_left - f0
    residuals_right = Y_right - f0

    #Calculate sum of residuals for each node
    sum_of_residuals_left = np.sum(residuals_left)
    sum_of_residuals_right = np.sum(residuals_right)

    #Calculate similarity score for each node
    similarity_left = sum_of_residuals_left**2 / (f0 * (1 - f0) + lambda_)
    similarity_right = sum_of_residuals_right**2 / (f0 * (1 - f0) + lambda_)

    return similarity_left, similarity_right

In [7]:
# Calculate Similarity Score for each split
splits = [23.5, 25, 26.5]
gains = []

for split in splits:
    Y_left = Y[X < split]
    Y_right = Y[X >= split]

    similarity_left, similarity_right = similarity_score_nodes(Y_left, Y_right, f0, lambda_)
    gain = similarity_left + similarity_right - similarity_root
    gains.append((gain, split, similarity_left, similarity_right))
    print(f"Split X < {split}: Similarity Left = {similarity_left:.4f}, Right = {similarity_right:.4f}, Gain = {gain:.4f}")

Split X < 23.5: Similarity Left = 1.0000, Right = 1.0000, Gain = 2.0000
Split X < 25: Similarity Left = 4.0000, Right = 4.0000, Gain = 8.0000
Split X < 26.5: Similarity Left = 1.0000, Right = 1.0000, Gain = 2.0000


In [9]:
#Step 4: Choose model with the highest gain
best_gain, best_split, _, _ = max(gains, key=lambda x: x[0])
print(f"Choose Split X < {best_split} with the highest Gain: {best_gain:.4f}")

Choose Split X < 25 with the highest Gain: 8.0000


In [13]:
#Step 5: Calculate Output for left and right nodes
Y_left = Y[X < best_split]
Y_right = Y[X >= best_split]
residuals_left = Y_left - f0
residuals_right = Y_right - f0

output_left = np.sum(residuals_left) / (f0 * (1 - f0))
output_right = np.sum(residuals_right) / (f0 * (1 - f0))

print(f"Output for the Left = {output_left:.4f} \nOutput for the Right = {output_right:.4f}")

Output for the Left = -4.0000 
Output for the Right = 4.0000


In [15]:
# Step 6: Make prediction for x = 25
x_pred = 25
if x_pred < best_split:
    output = output_left
else:
    output = output_right

log_prediction = np.log(f0 / (1 - f0)) + lr * output
predicted_prob = np.exp(log_prediction) / (1 + np.exp(log_prediction))

print(f"Prediction for x = {x_pred}:\nLog Prediction = {log_prediction:.4f} \nProbability = {predicted_prob:.4f}")

Prediction for x = 25:
Log Prediction = 1.2000 
Probability = 0.7685
