Define the Tree Node Structure

In [1]:
class TreeNode:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value


Implement the Split Function

In [2]:
import numpy as np

def split_dataset(X, y, feature, threshold):
    left_mask = X[:, feature] <= threshold
    right_mask = X[:, feature] > threshold
    return X[left_mask], X[right_mask], y[left_mask], y[right_mask]

def calculate_mse(y):
    if len(y) == 0:
        return 0
    return np.mean((y - np.mean(y)) ** 2)

def best_split(X, y):
    best_feature = None
    best_threshold = None
    best_mse = float('inf')
    n_samples, n_features = X.shape
    
    for feature in range(n_features):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            X_left, X_right, y_left, y_right = split_dataset(X, y, feature, threshold)
            mse = (len(y_left) * calculate_mse(y_left) + len(y_right) * calculate_mse(y_right)) / len(y)
            if mse < best_mse:
                best_mse = mse
                best_feature = feature
                best_threshold = threshold
                
    return best_feature, best_threshold


Build the Tree

In [3]:
class DecisionTreeRegressor:
    def __init__(self, max_depth=None, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None

    def fit(self, X, y):
        self.root = self._build_tree(X, y, depth=0)
        
    def _build_tree(self, X, y, depth):
        n_samples, n_features = X.shape
        if n_samples < self.min_samples_split or (self.max_depth is not None and depth >= self.max_depth):
            return TreeNode(value=np.mean(y))
        
        feature, threshold = best_split(X, y)
        if feature is None:
            return TreeNode(value=np.mean(y))
        
        X_left, X_right, y_left, y_right = split_dataset(X, y, feature, threshold)
        left_child = self._build_tree(X_left, y_left, depth + 1)
        right_child = self._build_tree(X_right, y_right, depth + 1)
        return TreeNode(feature=feature, threshold=threshold, left=left_child, right=right_child)


Prediction Function

In [4]:
def predict(self, X):
    return np.array([self._predict(inputs) for inputs in X])
    
def _predict(self, inputs):
    node = self.root
    while node.value is None:
        if inputs[node.feature] <= node.threshold:
            node = node.left
        else:
            node = node.right
    return node.value


Putting It All Together

In [8]:
# Generate some sample data
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=1, noise=0.1)

# Fit the decision tree regressor
tree = DecisionTreeRegressor(max_depth=5)
tree.fit(X, y)

# Make predictions
predictions = tree.predict(X)

# Evaluate the model
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y, predictions)
print(f'Mean Squared Error: {mse}')


AttributeError: 'DecisionTreeRegressor' object has no attribute 'predict'