In [62]:
import pandas as pd
import numpy as np

In [78]:
class DecisionTree:
    def __init__(self, max_depth=5):
        self.max_depth = max_depth
        self.X = None
        self.y = None
        self.n = 2
        self.m = 1
        self.steps = []
    
    def fit(self, X, y):
        if self.max_depth != 0:
            self.m = X.shape[0]
            min_entropy = ['i', 'value', float('inf'), None, None]
            entropy_now = self.get_entropy(y)
            if entropy_now <= 0.1:
                self.steps = y.mode()[0]
                return self
            for i in range(self.n):
                column = X.columns[i]
                for m in range(self.m):
                    value = X.iloc[m].iloc[i]
                    indices_0 = X[X[column] < value].index
                    indices_1 = X[X[column] >= value].index
                    entropy_mean = (self.get_entropy(y.iloc[indices_0]) + self.get_entropy(y.iloc[indices_1])) * 0.5
                    if entropy_mean < entropy_now and entropy_mean < min_entropy[2]:
                        min_entropy[0], min_entropy[1], min_entropy[2] = i, value, entropy_mean
                        min_entropy[3], min_entropy[4] = indices_0, indices_1
            self.steps = [min_entropy, 
                    DecisionTree(max_depth=(self.max_depth - 1)).fit(X.iloc[min_entropy[3]], y.iloc[min_entropy[3]]), 
                    DecisionTree(max_depth=(self.max_depth - 1)).fit(X.iloc[min_entropy[4]], y.iloc[min_entropy[4]])]
        else:
            self.steps = y.mode()[0]
        return self

    def predict(self, X):
        predicted = []
        for m in range(X.shape[0]):
            step = self.steps
            while isinstance(step, list):
                feature_index, threshold, _, _, _ = step[0]
                if X.iloc[m].iloc[feature_index] < threshold:
                    step = step[1]
                else:
                    step = step[2]
            predicted.append(step)
        return pd.Series(predicted)

    def get_entropy(self, y):
        if isinstance(y, (int, float)):
            return 0
        n = y.shape[0]
        if n == 0:
            return 0
        p_1 = sum(y) / n
        p_0 = 1 - p_1
        if p_1 == 0 or p_0 == 0:
            return 0
        return -1 * p_1 * np.log2(p_1) - p_0 * np.log2(p_0)

In [79]:
# Создаем данные
data = {'feature_1': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
        'feature_2': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1],
        'target': [0, 0, 0, 0, 1, 1, 1, 1, 1, 1]}
df = pd.DataFrame(data)

# Разделяем на признаки и целевую переменную
X = df[['feature_1', 'feature_2']]
y = df['target']

# Создаем и обучаем дерево
tree = DecisionTree(max_depth=3)
tree.fit(X, y)

# Делаем предсказания
predictions = tree.predict(X)
print(predictions)

0    <__main__.DecisionTree object at 0x000001E137E...
1    <__main__.DecisionTree object at 0x000001E137E...
2    <__main__.DecisionTree object at 0x000001E137E...
3    <__main__.DecisionTree object at 0x000001E137E...
4    <__main__.DecisionTree object at 0x000001E137C...
5    <__main__.DecisionTree object at 0x000001E137C...
6    <__main__.DecisionTree object at 0x000001E137C...
7    <__main__.DecisionTree object at 0x000001E137C...
8    <__main__.DecisionTree object at 0x000001E137C...
9    <__main__.DecisionTree object at 0x000001E137C...
dtype: object
