In [10]:
import numpy as np

In [11]:
def gini(y):
    classes = np.unique(y)
    impurity = 1.0
    for cls in classes:
        prob_cls = np.sum(y == cls) / len(y)
        impurity -= prob_cls ** 2
    return impurity

In [12]:
def split_dataset(X, y, feature_index, threshold):
    left = np.where(X[:, feature_index] <= threshold)
    right = np.where(X[:, feature_index] > threshold)
    return X[left], X[right], y[left], y[right]

In [13]:
def best_split(X, y):
    best_gini = float("inf")
    best_index, best_threshold = None, None
    for feature_index in range(X.shape[1]):
        thresholds = np.unique(X[:, feature_index])
        for threshold in thresholds:
            _, _, y_left, y_right = split_dataset(X, y, feature_index, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue
            # Calculate weighted Gini
            gini_score = (len(y_left) / len(y) * gini(y_left)) + (len(y_right) / len(y) * gini(y_right))
            if gini_score < best_gini:
                best_gini = gini_score
                best_index = feature_index
                best_threshold = threshold
    return best_index, best_threshold

In [14]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

In [15]:
def build_tree(X, y, depth=0, max_depth=10):
    num_samples, num_features = X.shape
    if len(np.unique(y)) == 1 or depth >= max_depth:
        leaf_value = np.bincount(y).argmax()
        return Node(value=leaf_value)

    feature_index, threshold = best_split(X, y)
    if feature_index is None:
        leaf_value = np.bincount(y).argmax()
        return Node(value=leaf_value)

    X_left, X_right, y_left, y_right = split_dataset(X, y, feature_index, threshold)
    left = build_tree(X_left, y_left, depth + 1, max_depth)
    right = build_tree(X_right, y_right, depth + 1, max_depth)
    return Node(feature_index, threshold, left, right)

In [16]:
def predict(node, x):
    if node.value is not None:
        return node.value
    if x[node.feature] <= node.threshold:
        return predict(node.left, x)
    else:
        return predict(node.right, x)

In [17]:
def predict_tree(tree, X):
    return [predict(tree, x) for x in X]

In [18]:
X = np.array([[2, 3], [1, 1], [4, 6], [5, 7]])
y = np.array([0, 0, 1, 1])
tree = build_tree(X, y)
predictions = predict_tree(tree, X)
print("Predictions:", predictions)

Predictions: [0, 0, 1, 1]
