In [18]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [19]:
def entropy(y):
    unique, count = np.unique(y, return_counts=True)
    probabilities = count / len(y)
    return -np.sum( probabilities + np.log2(probabilities))

def information(y, splits):
    total_entropy = entropy(y)
    weighted_entropy =0
    for split in splits:
        weighted_entropy+= len(split) / len(y) * entropy(split)
    return total_entropy -weighted_entropy

In [20]:
def find_best_split(X,y):
    best_split = None
    best_gain = 0
    for feature in range(X.shape[1]):
        unique_value = set(X[:,feature])
        for value in unique_value:
            left_mask = X[:, feature] <= value
            right_mask = X[:, feature] > value
            splits = [y[left_mask], y[right_mask]]
            gain = information(y, splits)
            if gain > best_gain:
                best_split = (feature,value)
                best_gain = gain
    return best_split
def build_decision_tree(X,y,max_depth=None, depth = 0):
    if depth == max_depth or len(set(y)) == 1:
        return max(set(y), key = list(y).count)
    else:
        depth += 1
        best_split = find_best_split(X,y)
        if best_split is None:
            return max(set(y), key = list(y).count)
        feature, value = best_split
        left_mask = X[:, feature] <= value
        right_mask = X[:, feature] > value
        left_subtree = build_decision_tree(X[left_mask], y[left_mask], max_depth, depth)
        right_subtree = build_decision_tree(X[right_mask],y[right_mask], max_depth, depth)
        return (feature, value, left_subtree,right_subtree)
def predict_decision_tree(tree, x):
    if isinstance(tree, tuple):
        feature, value, left_subtree, right_subtree = tree
        if x[feature] <= value:
            return predict_decision_tree(left_subtree,x)
        else:
           return predict_decision_tree(right_subtree,x)
    else:
        return tree
  

In [21]:
#random forest
def buil_random_forest(X,y,num_tree, max_depth= None):
    forest = []
    for _ in range(num_tree):
        sample_indices = np.random.choice(len(y), len(y), replace = True)
        X_sample = X[sample_indices]
        y_sample = y[sample_indices]

        tree = build_decision_tree(X_sample, y_sample, max_depth)
        forest.append(tree)
    return forest

In [22]:
def predict_random_forest(forest, x):
    prediction = [predict_decision_tree(tree,x) for tree in forest]
    return max(set(prediction), key = prediction.count)


In [23]:
X = np.array([[1, 2], [2, 1], [3, 2], [4, 1]])
y = np.array([0, 1, 0, 1])
X_new = np.array([[2, 1], [3, 2], [5, 2], [3, 1]])

num_tree = 3
forest = buil_random_forest(X,y,num_tree)
predictions = [ predict_random_forest(forest,x) for x in X_new]
print(f" the prediction of random forest: {predictions}" )
                 

 the prediction of random forest: [1, 0, 0, 1]
