In [1]:
import math
from collections import Counter


def entropy(labels):
    """Calculate entropy of labels"""
    counts = Counter(labels)
    return -sum(p / len(labels) * math.log2(p / len(labels)) for p in counts.values())


def build_tree(data, features, target="label"):
    """Recursively build decision tree"""
    labels = [row[target] for row in data]

    # Base cases
    if len(set(labels)) == 1:
        return labels[0]
    if not features:
        return Counter(labels).most_common(1)[0][0]

    # Find best feature to split on
    best_feature = max(features, key=lambda f: information_gain(data, f, target))

    # Build tree recursively
    tree = {best_feature: {}}
    for value in set(row[best_feature] for row in data):
        subset = [row for row in data if row[best_feature] == value]
        subtree = build_tree(subset, [f for f in features if f != best_feature], target)
        tree[best_feature][value] = subtree

    return tree


def information_gain(data, feature, target="label"):
    """Calculate information gain for a feature"""
    total_entropy = entropy([row[target] for row in data])

    # Calculate weighted entropy of subsets
    weighted_entropy = sum(
        len([row for row in data if row[feature] == val])
        / len(data)
        * entropy([row[target] for row in data if row[feature] == val])
        for val in set(row[feature] for row in data)
    )

    return total_entropy - weighted_entropy


# Example dataset
data = [
    {
        "outlook": "sunny",
        "temperature": "hot",
        "humidity": "high",
        "windy": False,
        "label": "no",
    },
    {
        "outlook": "sunny",
        "temperature": "hot",
        "humidity": "high",
        "windy": True,
        "label": "no",
    },
    {
        "outlook": "overcast",
        "temperature": "hot",
        "humidity": "high",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "rainy",
        "temperature": "mild",
        "humidity": "high",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "rainy",
        "temperature": "cool",
        "humidity": "normal",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "rainy",
        "temperature": "cool",
        "humidity": "normal",
        "windy": True,
        "label": "no",
    },
    {
        "outlook": "overcast",
        "temperature": "cool",
        "humidity": "normal",
        "windy": True,
        "label": "yes",
    },
    {
        "outlook": "sunny",
        "temperature": "mild",
        "humidity": "high",
        "windy": False,
        "label": "no",
    },
    {
        "outlook": "sunny",
        "temperature": "cool",
        "humidity": "normal",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "rainy",
        "temperature": "mild",
        "humidity": "normal",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "sunny",
        "temperature": "mild",
        "humidity": "normal",
        "windy": True,
        "label": "yes",
    },
    {
        "outlook": "overcast",
        "temperature": "mild",
        "humidity": "high",
        "windy": True,
        "label": "yes",
    },
    {
        "outlook": "overcast",
        "temperature": "hot",
        "humidity": "normal",
        "windy": False,
        "label": "yes",
    },
    {
        "outlook": "rainy",
        "temperature": "mild",
        "humidity": "high",
        "windy": True,
        "label": "no",
    },
]

# Run the decision tree algorithm
features = ["outlook", "temperature", "humidity", "windy"]
decision_tree = build_tree(data, features)
print("Decision Tree:")
print(decision_tree)

Decision Tree:
{'outlook': {'overcast': 'yes', 'rainy': {'windy': {False: 'yes', True: 'no'}}, 'sunny': {'humidity': {'normal': 'yes', 'high': 'no'}}}}
