<a href="https://colab.research.google.com/github/bhoop70233/Least-Squares-Regression-Tree/blob/main/Least_Squares_Regression_Tree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np


In [9]:
import numpy as np

class Node:
    def __init__(self, region, depth=0):
        self.region = region
        self.depth = depth
        self.split_feature = None
        self.split_value = None
        self.left = None
        self.right = None
        self.prediction = None

def mse(region):
    if len(region) == 0:
        return 0
    mean_value = np.mean([y for _, y in region])
    return sum((y - mean_value) ** 2 for _, y in region)

def find_best_split(data):
    best_feature = None
    best_split = None
    best_mse = float('inf')
    best_regions = None

    for feature in range(len(data[0][0])):
        sorted_data = sorted(data, key=lambda x: x[0][feature])
        for i in range(1, len(sorted_data)):
            if sorted_data[i-1][0][feature] == sorted_data[i][0][feature]:
                continue
            split_value = (sorted_data[i-1][0][feature] + sorted_data[i][0][feature]) / 2
            left_region = sorted_data[:i]
            right_region = sorted_data[i:]
            current_mse = mse(left_region) + mse(right_region)

            if current_mse < best_mse:
                best_mse = current_mse
                best_feature = feature
                best_split = split_value
                best_regions = (left_region, right_region)

    return best_feature, best_split, best_regions

def build_tree(data, max_depth=float('inf'), min_samples_split=2):
    root = Node(data)
    nodes = [root]

    while nodes:
        node = nodes.pop()
        if node.depth >= max_depth or len(node.region) < min_samples_split:
            node.prediction = np.mean([y for _, y in node.region])
            continue

        feature, split_value, regions = find_best_split(node.region)
        if regions is None:
            node.prediction = np.mean([y for _, y in node.region])
            continue

        node.split_feature = feature
        node.split_value = split_value
        node.left = Node(regions[0], node.depth + 1)
        node.right = Node(regions[1], node.depth + 1)

        nodes.append(node.left)
        nodes.append(node.right)

    return root

def predict(tree, x):
    node = tree
    while node.left and node.right:
        if x[node.split_feature] <= node.split_value:
            node = node.left
        else:
            node = node.right
    return node.prediction

# Example usage
training_data = [
    (np.array([2.5]), 5.0),
    (np.array([5.1]), 7.5),
    (np.array([3.3]), 6.2),
    (np.array([4.7]), 8.1),
    (np.array([1.8]), 3.9)
]

tree = build_tree(training_data)
test_point = np.array([4.0])
prediction = predict(tree, test_point)
print(f"Prediction for {test_point}: {prediction}")


Prediction for [4.]: 6.2
