In [1]:
from sklearn.datasets import load_boston
boston = load_boston()

In [2]:
data, target = boston['data'], boston['target']

In [52]:
from numpy import vectorize, zeros, array, invert, apply_along_axis, array_split
from sklearn.metrics import mean_squared_error

apply = lambda arr, f: apply_along_axis(f, 1, arr)

class Threshold:
    def __init__(self, feature, value):
        self.feature = feature
        self.value = value
    
    def __call__(self, x):
        return x[self.feature] < self.value
    
    def sift(self, X):
        return apply(X, self)

class Node:
    def __init__(self, left=None, right=None):
        self.left = left
        self.right = right
        self.is_leaf = False

def neighbour_means(X, feature):
    size, _ = X.shape
    means = zeros(10)
    column = X.take([feature], 1).ravel()
    return [column.mean()]
    column.sort()
    return [split.mean() for split in array_split(column, 10)]
    
        
def evaluate(Y, partition):
    def mse(Y):
        mean = Y.mean()
        return ((Y - mean) ** 2).mean()
    return mse(Y[partition]) + mse(Y[invert(partition)])

In [53]:
from numpy import ones_like, zeros_like
from sklearn.base import BaseEstimator
from tqdm import trange

class DecisionTree(BaseEstimator):
    def __init__(self, max_depth=5):
        self.root = None
        self.max_depth = max_depth
    
    def fit(self, X, Y):
        self.root = Node()
        return self._fit(X, Y, self.root)
        
    def _fit(self, X, Y, node, depth=0):
        def make_leaf():
            node.is_leaf = True
            node.value = Y.mean()
        
        size, features_qty = X.shape
        
        if not depth < self.max_depth or size < 30:
            return make_leaf()
        
        min_error, best_partition = 2 ** 31, None
        best_threshold = None
        
        for feature in range(features_qty):
            for value in neighbour_means(X, feature):
                threshold = Threshold(feature, value)
                partition = threshold.sift(X)
                if not partition.any() or partition.all():
                    continue
                error = evaluate(Y, partition)
                if min_error > error:
                    min_error = error
                    best_partition, best_threshold = partition, threshold
        
        if min_error == 2 ** 31:
            return make_leaf()
        
        node.threshold = best_threshold
        
        node.left = Node()
        node.right = Node()
        
        self._fit(X[best_partition], Y[best_partition], node.left, depth + 1)
        self._fit(X[invert(best_partition)], Y[invert(best_partition)], node.right, depth + 1)
        
    def predict(self, X):
        def predict_value(x):
            node = self.root
            while not node.is_leaf:
                node = node.left if node.threshold(x) \
                       else node.right
            return node.value
        return apply(X, predict_value)

In [61]:
from sklearn.model_selection import cross_val_score
model = DecisionTree()
-cross_val_score(model, data, target, scoring='neg_mean_squared_error').mean()

43.017995357583906

In [63]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
-cross_val_score(model, data, target, scoring='neg_mean_squared_error').mean()

46.720606391471769

In [96]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(data, target)
model = DecisionTree()
model.fit(X_train, Y_train)
mean_squared_error(model.predict(X_test), Y_test)

14.831095784893783

In [83]:
model = DecisionTreeRegressor()
model.fit(X_train, Y_train)
mean_squared_error(model.predict(X_test), Y_test)

22.090472440944879