In [1]:
import numpy as np
import pandas as pd
from collections import Counter

# Function to calculate entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Function to calculate information gain
def information_gain(X, y, feature_index):
    original_entropy = entropy(y)
    values, counts = np.unique(X[:, feature_index], return_counts=True)
    weighted_entropy = np.sum(
        [(counts[i] / np.sum(counts)) * entropy(y[X[:, feature_index] == value]) for i, value in enumerate(values)]
    )
    return original_entropy - weighted_entropy

# Function to find the best attribute
def best_attribute(X, y):
    gains = [information_gain(X, y, i) for i in range(X.shape[1])]
    return np.argmax(gains)

# Recursive function to build the decision tree
def build_tree(X, y, attributes):
    # Base cases
    if len(np.unique(y)) == 1:
        return np.unique(y)[0]
    if len(attributes) == 0:
        return Counter(y).most_common(1)[0][0]

    # Step 3: Find the best attribute
    best_attr = best_attribute(X, y)
    tree = {best_attr: {}}

    # Step 4: Split data and recurse
    for value in np.unique(X[:, best_attr]):
        subset_X = X[X[:, best_attr] == value]
        subset_y = y[X[:, best_attr] == value]
        subtree = build_tree(subset_X, subset_y, attributes - {best_attr})
        tree[best_attr][value] = subtree

    return tree

# Function to predict using the decision tree
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    attr = list(tree.keys())[0]
    subtree = tree[attr].get(sample[attr], None)
    if subtree is None:
        return None
    return predict(subtree, sample)

# Example usage
data = pd.DataFrame({
    'Outlook': [0, 0, 1, 1, 2, 2, 2, 0, 0, 1, 2, 1, 1, 2],
    'Temperature': [0, 0, 0, 1, 2, 2, 1, 1, 0, 1, 2, 1, 0, 1],
    'Humidity': [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0],
    'Windy': [0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1],
    'PlayTennis': [0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
})

X = data.drop('PlayTennis', axis=1).values
y = data['PlayTennis'].values
attributes = set(range(X.shape[1]))

# Build tree and test prediction
tree = build_tree(X, y, attributes)
sample = X[0]  # Replace with any test sample
print("Decision Tree:", tree)


Decision Tree: {np.int64(0): {np.int64(0): {np.int64(3): {np.int64(0): np.int64(0), np.int64(1): {np.int64(2): {np.int64(0): np.int64(1), np.int64(1): np.int64(0)}}}}, np.int64(1): np.int64(1), np.int64(2): {np.int64(3): {np.int64(0): np.int64(1), np.int64(1): {np.int64(1): {np.int64(1): {np.int64(2): {np.int64(0): np.int64(0), np.int64(1): np.int64(1)}}, np.int64(2): np.int64(0)}}}}}}
