In [1]:
import math
from collections import Counter

# Sample dataset: Play Tennis
# Attributes: Outlook, Temperature, Humidity, Wind
# Target: Play (Yes/No)

data = [
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Strong', 'No']
]

attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']
target_index = 4

def entropy(data):
    labels = [row[target_index] for row in data]
    label_counts = Counter(labels)
    total = len(data)
    ent = 0
    for count in label_counts.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent

def information_gain(data, attr_index):
    total_entropy = entropy(data)
    attr_values = set(row[attr_index] for row in data)
    weighted_entropy = 0
    for value in attr_values:
        subset = [row for row in data if row[attr_index] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)
    return total_entropy - weighted_entropy

def id3(data, available_attrs, target_index):
    labels = [row[target_index] for row in data]
    if len(set(labels)) == 1:
        return labels[0]
    
    if not available_attrs:
        return Counter(labels).most_common(1)[0][0]
    
    gains = []
    for attr in available_attrs:
        attr_index = attributes.index(attr)
        gains.append(information_gain(data, attr_index))
    
    best_attr = available_attrs[gains.index(max(gains))]
    best_attr_index = attributes.index(best_attr)
    
    tree = {best_attr: {}}
    attr_values = set(row[best_attr_index] for row in data)
    
    remaining_attrs = [a for a in available_attrs if a != best_attr]
    
    for value in attr_values:
        subset = [row for row in data if row[best_attr_index] == value]
        subtree = id3(subset, remaining_attrs, target_index)
        tree[best_attr][value] = subtree
    
    return tree

# Build the tree
tree = id3(data, attributes, target_index)
print("Decision Tree:")
print(tree)

Decision Tree:
{'Outlook': {'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Humidity': {'Normal': 'Yes', 'High': 'No'}}, 'Overcast': 'Yes'}}
