In [21]:
import numpy as np
from sklearn.model_selection import train_test_split

dt = np.loadtxt('data/smt.txt', delimiter=',')

X = dt[:, :2]

y = dt[:, 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [30]:
from sklearn.tree import DecisionTreeClassifier


class DfTree:
    def __init__(self,
                 splitter="best",
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1):
        self.splitter = splitter
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf

    def predict_proba(self, X, check_input=True):
        X = self._validate_X_predict(X, check_input)
        proba = self.tree_.predict(X)

        if self.n_outputs_ == 1:
            proba = proba[:, :self.n_classes_]
            normalizer = proba.sum(axis=1)[:, np.newaxis]
            normalizer[normalizer == 0.0] = 1.0
            proba /= normalizer

            return proba

        else:
            all_proba = []

            for k in range(self.n_outputs_):
                proba_k = proba[:, k, :self.n_classes_[k]]
                normalizer = proba_k.sum(axis=1)[:, np.newaxis]
                normalizer[normalizer == 0.0] = 1.0
                proba_k /= normalizer
                all_proba.append(proba_k)

            return all_proba

    def fit(self, X, y):
        self.model = DecisionTreeClassifier(splitter=self.splitter, max_depth=self.max_depth,
                                            min_samples_split=self.min_samples_split,
                                            min_samples_leaf=self.min_samples_leaf)
        self.model.fit(X, y)

    def pred(self, X):
        return self.model.predict(X)


In [31]:
from sklearn.metrics import accuracy_score

model = DfTree(splitter="best", max_depth=2)

model.fit(X_train, y_train)

pre = model.pred(X_test)

acc = accuracy_score(y_test, pre)

acc

0.16666666666666666