In [None]:
import pandas as pd
import numpy as np

In [60]:
np.random.seed(42)

feature1 = np.random.rand(100) * 10
feature2 = np.random.rand(100) * 10

X = np.column_stack((feature1, feature2))
y = np.random.choice([0, 1], size=100)

In [61]:
class DecisionTree:
  def __init__(self, max_depth=None):
    self.max_depth = max_depth

  def fit(self, X, y, depth=0):
    if depth == self.max_depth or len(set(y)) == 1:
      return {'class': np.argmax(np.bincount(y))}

    feature, threshold = self.find_best_split(X, y)

    if feature is None:
      return {'class': np.argmax(np.bincount(y))}

    left_indices = X[:,feature] <= threshold
    right_indices = ~left_indices

    left_subtree = self.fit(X[left_indices], y[left_indices], depth+1)
    right_subtree = self.fit(X[right_indices], y[right_indices], depth+1)

    return {
        'best_feature': feature,
        'best_threshold': threshold,
        'left_subtree': left_subtree,
        'right_subtree': right_subtree
    }


  def find_best_split(self, X, y):
    num_features = X.shape[1]
    best_feature = None
    best_threshold = None
    best_gini = float('inf')

    for feature in range(num_features):
      values = set(X[:,feature])

      for value in values:
        left_indices = X[:,feature] <= value
        right_indices = ~left_indices

        gini = self.find_gini(y[left_indices], y[right_indices])

        if gini < best_gini:
          best_gini = gini
          best_feature = feature
          best_threshold = value
    return best_feature, best_threshold

  def find_gini(self, left, right):
    total_samples = len(left) + len(right)
    gini_left = 1.0 - sum((np.sum(left == label)/len(left))**2 for label in set(left))
    gini_right = 1.0 - sum((np.sum(right == label)/len(right))**2 for label in set(right))

    gini = (len(left)/total_samples)*gini_left + (len(right)/total_samples)*gini_right
    return gini

In [63]:
dtree = DecisionTree(2)
dtree.fit(X,y)

{'best_feature': 0,
 'best_threshold': 8.021969807540398,
 'left_subtree': {'best_feature': 0,
  'best_threshold': 6.3755747135521315,
  'left_subtree': {'class': 1},
  'right_subtree': {'class': 0}},
 'right_subtree': {'best_feature': 0,
  'best_threshold': 9.218742350231167,
  'left_subtree': {'class': 1},
  'right_subtree': {'class': 1}}}