In [1]:
import pandas as pd
import numpy as np

In [None]:
class RegressionTree:
    def __init__(self, max_depth, min_n_samples):
        self.max_depth = max_depth
        self.min_n_samples = min_n_samples
        self.tree = None

    def _mse(self, y):
        if len(y) == 0:
            return 0
    
        return np.mean((y - np.mean(y)) ** 2)
    
    def _best_split(self, X, y):
        best_mse = float('inf')
        best_split = None
        n_samples, n_features = X.shape

        for feature_idx in range(n_features):
            thresholds = np.unique(X[:, feature_idx])
            for threshold in thresholds:
                left_mask, right_mask = X[:, feature_idx] <= threshold, ~left_mask

                if (len(y[left_mask]) < 1) or (len(y[right_mas]) < 1):
                    continue
                    
                left_mse = self._mse(y[left_mask])
                right_mse = self._mse(y[right_mask])

                weighted_mse = (len(y[left_mask]) * left_mse + len(y[right_mask]) * right_mse) / len(y)

                if weighted_mse < best_mse:
                    best_mse = weighted_mse
                    best_split = {
                        "type" : "node",
                        "feature" : feature_idx,
                        "threshold" : threshold,
                        "left" : (X[left_mask], y[left_mask]),
                        "right" : (X[right_mask], y[right_mask]),
                    }
        
        return best_split
    
    def fit(self, X, y):
        self.tree = self_build_tree(X,y, depth = 0)
    
    def _build_tree(self, X, y, depth):
        if (depth >= self.max_depth) or (len(X) < min_n_samples):
            return {
                "type" : "leaf",
                "value" : np.mean(y)
            }
        
        best_split = self._best_split(X,y):
        if not best_split:
            return {
                "type" : "leaf",
                "value" : np.mean(y)
            }
        
        left_branch = self._build_tree(*best_split["left"], depth + 1)
        right_branch = self._build_tree(*best_split["right"], depth + 1)

        return {
            "type" : "node",
            "feature" : best_split["feature"],
            "threshold" : best_split["threshold"],
            "left" : left_branch,
            "right" : right_branch
        }
    
    def _predict_one(self, X):
        return 0