In [5]:
import math
from collections import Counter


def entropy(data):
    labels = [item["label"] for item in data]
    label_counts = Counter(labels)
    # print(label_counts.values())
    # print(labels)
    entropy = 0
    for count in label_counts.values():
        probability = count / len(labels)
        entropy -= probability * math.log2(probability)
    return entropy


def information_gain(data, attribute):
    values = set([item[attribute] for item in data])
    rem = 0
    for value in values:
        subset = [item for item in data if item[attribute] == value]
        rem += len(subset) / len(data) * entropy(subset)
    return entropy(data) - rem


def id3(data, features):
    labels = [item["label"] for item in data]
    if len(set(labels)) == 1:
        return labels[0]
    if len(features) == 0:
        return Counter(labels).most_common(1)[0][0]

    best_attribute = max(
        features, key=lambda attribute: information_gain(data, attribute)
    )
    tree = {best_attribute: {}}
    remaining_features = [f for f in features if f != best_attribute]

    for value in set([item[best_attribute] for item in data]):
        subset = [item for item in data if item[best_attribute] == value]
        subtree = id3(subset, remaining_features)
        tree[best_attribute][value] = subtree

    return tree


# Example usage
if __name__ == "__main__":
    # Example dataset (weather outlook and whether to play golf)
    data = [
        {
            "outlook": "sunny",
            "temperature": "hot",
            "humidity": "high",
            "windy": False,
            "label": "no",
        },
        {
            "outlook": "sunny",
            "temperature": "hot",
            "humidity": "high",
            "windy": True,
            "label": "no",
        },
        {
            "outlook": "overcast",
            "temperature": "hot",
            "humidity": "high",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "rainy",
            "temperature": "mild",
            "humidity": "high",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "rainy",
            "temperature": "cool",
            "humidity": "normal",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "rainy",
            "temperature": "cool",
            "humidity": "normal",
            "windy": True,
            "label": "no",
        },
        {
            "outlook": "overcast",
            "temperature": "cool",
            "humidity": "normal",
            "windy": True,
            "label": "yes",
        },
        {
            "outlook": "sunny",
            "temperature": "mild",
            "humidity": "high",
            "windy": False,
            "label": "no",
        },
        {
            "outlook": "sunny",
            "temperature": "cool",
            "humidity": "normal",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "rainy",
            "temperature": "mild",
            "humidity": "normal",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "sunny",
            "temperature": "mild",
            "humidity": "normal",
            "windy": True,
            "label": "yes",
        },
        {
            "outlook": "overcast",
            "temperature": "mild",
            "humidity": "high",
            "windy": True,
            "label": "yes",
        },
        {
            "outlook": "overcast",
            "temperature": "hot",
            "humidity": "normal",
            "windy": False,
            "label": "yes",
        },
        {
            "outlook": "rainy",
            "temperature": "mild",
            "humidity": "high",
            "windy": True,
            "label": "no",
        },
    ]

    features = ["outlook", "temperature", "humidity", "windy"]
    target_attribute = "label"

    decision_tree = id3(data, features)
    print("Decision Tree:")
    print(decision_tree)

Decision Tree:
{'outlook': {'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}, 'rainy': {'windy': {False: 'yes', True: 'no'}}, 'overcast': 'yes'}}


In [4]:
{
    "outlook": {
        "sunny": {"humidity": {"high": "no", "normal": "yes"}},
        "rainy": {"windy": {False: "yes", True: "no"}},
        "overcast": "yes",
    }
}

{'outlook': {'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}},
  'rainy': {'windy': {False: 'yes', True: 'no'}},
  'overcast': 'yes'}}

In [6]:
{
    "outlook": {
        "sunny": {"humidity": {"high": "no", "normal": "yes"}},
        "rainy": {"windy": {False: "yes", True: "no"}},
        "overcast": "yes",
    }
}

{'outlook': {'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}},
  'rainy': {'windy': {False: 'yes', True: 'no'}},
  'overcast': 'yes'}}