In [1]:
from sklearn.metrics import accuracy_score
import numpy as np

class MyDTClassifier:

    def __init__(self, max_depth, min_size):
        self.max_depth, self.min_size = max_depth, min_size

    def get_gini_and_splitting_value(self, X, y):
        m = y.shape[0]
        if m <= 1:
            return None, None
        parent = [np.sum(y == c) for c in range(self.n_classes)]
        gini = 1.0 - sum((n / m) ** 2 for n in parent)
        index, value = None, None
        for i in range(self.features):
            thresholds, classes = zip(*sorted(zip(X[:, i], y)))
            n_left = [0] * self.n_classes
            n_right = parent.copy()
            for k in range(1, m):
                c = classes[k - 1]
                n_left[c] += 1
                n_right[c] -= 1
                gini_left = 1.0 - sum((n_left[x] / k) ** 2 for x in range(self.n_classes))
                gini_right = 1.0 - sum((n_right[x] / (m - k)) ** 2 for x in range(self.n_classes))
                weighted_gini = (k * gini_left + (m - k) * gini_right) / m
                if thresholds[k] == thresholds[k - 1]:
                    continue
                if weighted_gini < gini:
                    gini = weighted_gini
                    index = i
                    value = (thresholds[k] + thresholds[k - 1]) / 2
        return index, value

    class Node:

        def __init__(self, data, max_depth, min_size=3):
            self.data = data
            self.max_depth = max_depth
            self.left = None
            self.right = None
            self.feature_index = 0
            self.threshold = 0  

    def fit(self, X_train, y_train):
        self.n_classes = len(set(y_train))
        self.features = X.shape[1]
        self.dec_tree = self.tree(X_train, y_train)
        return self

    def tree(self, X, y, depth=0):
        num_samples_per_class = [np.sum(y == i) for i in range(self.n_classes)]
        predicted_class = np.argmax(num_samples_per_class)
        node = self.Node(data=predicted_class, max_depth=3)
        if depth < self.max_depth:
            index, value = self.get_gini_and_splitting_value(X, y)
            if index is not None and len(y) >= self.min_size:
                indices_left = X[:, index] < value
                indices_right = X[:, index] >= value
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[indices_right], y[indices_right]
                node.feature_index = index
                node.threshold = value
                node.left = self.tree(X_left, y_left, depth + 1)
                node.right = self.tree(X_right, y_right, depth + 1)
        return node

    def func(self, data):
        node = self.dec_tree
        while node.left:
            if data[node.feature_index] < node.threshold:
                node = node.left
            else:
                node = node.right
        return node.data

    def predict(self, X_test):
        prediction = [self.func(data) for data in X_test]
        return prediction

    def score(self, prediction, y):
        n_class = np.unique(y)
        matrix = np.zeros((len(n_class), len(n_class)))
        for i in range(len(n_class)):
            for j in range(len(n_class)):
                matrix[i, j] = np.sum((y == n_class[i]) & (prediction == n_class[j]))
        return sum(np.diag(matrix)) / sum(sum(matrix))

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

iris = load_iris()
pair = [0,1]
X, y = iris.data[:, pair] , iris.target
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=4)

In [3]:
tree = MyDTClassifier(max_depth=3, min_size=3)
tree.fit(X_train, y_train)
prediction = tree.predict(X_test)
tree.score(prediction, y_test)

0.9666666666666667