In [56]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score

In [57]:
class Node:
    def __init__(self, feature_index=None, threshold=None, left_branch=None, right_branch=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left_branch = left_branch
        self.right_branch = right_branch
        self.value = value


class CART:

    def __init__(self, max_depth = 3):
        self.max_depth = max_depth
        

    def gini_(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return 1 - np.sum(probabilities ** 2)
    

    def split_node(self,X, y):
        best_gini = 1
        best_feature_index = None
        best_threshold = None

        n_samples, n_features = X.shape

        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])

            for threshold in thresholds:
                left_indices = X[:, feature_index] < threshold
                left_y = y[left_indices]
                right_y = y[~left_indices]

                if len(left_y) == 0 or len(right_y) == 0:
                    continue

                gini = (len(left_y) / n_samples) * self.gini_(left_y) + (len(right_y) / n_samples) * self.gini_(right_y)

                if gini < best_gini:
                    best_gini = gini
                    best_feature_index = feature_index
                    best_threshold = threshold

        left_indices = X[:, best_feature_index] < best_threshold
        right_indices = X[:, best_feature_index] >= best_threshold

        left_branch = Node(value=np.argmax(np.bincount(y[left_indices])))
        right_branch = Node(value=np.argmax(np.bincount(y[right_indices])))

        return Node(best_feature_index, best_threshold, left_branch, right_branch)
    

    def build_tree(self,X, y, max_depth, depth=0):
        if depth >= max_depth or len(y) == 1 or len(np.unique(y)) == 1:
            return Node(value=np.argmax(np.bincount(y)))
        node = self.split_node(X, y)
        node.left_branch = self.build_tree(X[X[:, node.feature_index] < node.threshold], y[X[:, node.feature_index] < node.threshold], max_depth, depth+1)
        node.right_branch = self.build_tree(X[X[:, node.feature_index] >= node.threshold], y[X[:, node.feature_index] >= node.threshold], max_depth, depth+1)
        return node
    
    def fit(self,X_train, y_train):
        self.root = self.build_tree(X_train, y_train, max_depth=2)

    
    
    def passTree(self,node, x):
        if node.value is not None:
            return node.value

        if x[node.feature_index] < node.threshold:
            return self.passTree(node.left_branch, x)
        else:
            return self.passTree(node.right_branch, x)
        
    def predict(self,X_test):
        y_pred = []
        
        for x in X_test:
            y_pred.append(self.passTree(self.root, x))
        return y_pred

In [58]:
data = pd.read_csv("../data/csgo_task_m.csv")

In [59]:
X = data.values
y = data["bomb_planted"].values

In [60]:
underSampler = RandomOverSampler()
X_under_sample, y_under_sample = underSampler.fit_resample(X,y)

In [61]:
X_train, X_test, y_train, y_test = train_test_split(X_under_sample, y_under_sample, test_size=0.2)

In [62]:
tree = CART()
tree.fit(X_train, y_train)
predictions = tree.predict(X_test)
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(precision_score(y_test, predictions))
print(recall_score(y_test, predictions))
print(f1_score(y_test, predictions))

1.0
[[19806     0]
 [    0 19900]]
1.0
1.0
1.0
