In [7]:
import numpy as np

In [8]:
np.random.seed(42)

In [9]:
X = np.array([
[150, 50], [160, 60], [170, 65], [180, 80], [190, 90], 
[155, 70], [165, 75], [175, 85], [185, 95], [195, 100] 
])

In [10]:
y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

In [11]:
def gini_impurity(y):
    classes, counts = np.unique(y, return_counts=True)
    probs = counts / counts.sum()
    return 1 - np.sum(probs ** 2)

In [12]:
def split_dataset(X, y, feature_idx, threshold):
    left_mask = X[:, feature_idx] <= threshold
    right_mask = ~left_mask
    return X[left_mask], y[left_mask], X[right_mask], y[right_mask]

In [13]:
class DecisionTreeNode:
    def __init__(self, feature_idx=None, threshold=None, left=None, right=None, value=None):
        self.feature_idx = feature_idx
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

In [14]:
class DecisionTree:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.root = None

    def build_tree(self, X, y, depth=0):
        if len(np.unique(y)) == 1 or depth >= self.max_depth:
            leaf_value = np.bincount(y).argmax()
            return DecisionTreeNode(value=leaf_value)

        best_gini = 1.0
        best_feature, best_threshold = None, None
        for feature_idx in range(X.shape[1]):
            thresholds = np.unique(X[:, feature_idx])
            for t in thresholds:
                _, y_left, _, y_right = split_dataset(X, y, feature_idx, t)
                if len(y_left) == 0 or len(y_right) == 0:
                    continue
                gini = (len(y_left)/len(y))*gini_impurity(y_left) + (len(y_right)/len(y))*gini_impurity(y_right)
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature_idx
                    best_threshold = t

        if best_feature is None:
            leaf_value = np.bincount(y).argmax()
            return DecisionTreeNode(value=leaf_value)

        X_left, y_left, X_right, y_right = split_dataset(X, y, best_feature, best_threshold)
        left_child = self.build_tree(X_left, y_left, depth+1)
        right_child = self.build_tree(X_right, y_right, depth+1)
        return DecisionTreeNode(best_feature, best_threshold, left_child, right_child)

    def fit(self, X, y):
        self.root = self.build_tree(X, y)

    def predict_one(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature_idx] <= node.threshold:
            return self.predict_one(x, node.left)
        else:
            return self.predict_one(x, node.right)

    def predict(self, X):
        return np.array([self.predict_one(sample, self.root) for sample in X])

In [15]:
tree = DecisionTree(max_depth=3)
tree.fit(X, y)

preds = tree.predict(X)
print("Predictions:", preds)
print("Actual:     ", y)
print("Accuracy:   ", np.mean(preds == y))

Predictions: [0 0 0 0 0 1 1 1 1 1]
Actual:      [0 0 0 0 0 1 1 1 1 1]
Accuracy:    1.0
