In [12]:
import math

# Helper function to calculate entropy
def entropy(data):
    labels = [row[-1] for row in data]
    label_counts = {label: labels.count(label) for label in set(labels)}
    total = len(data)
    return -sum((count / total) * math.log2(count / total) for count in label_counts.values())

# Helper function to calculate information gain
def information_gain(data, feature_index):
    total_entropy = entropy(data)
    feature_values = [row[feature_index] for row in data]
    unique_values = set(feature_values)
    
    weighted_entropy = 0
    for value in unique_values:
        subset = [row for row in data if row[feature_index] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)
    
    return total_entropy - weighted_entropy

# Helper function to get the best feature to split on
def best_split(data):
    best_gain = -float('inf')
    best_feature_index = -1
    for feature_index in range(len(data[0]) - 1):  # Last column is the label
        gain = information_gain(data, feature_index)
        if gain > best_gain:
            best_gain = gain
            best_feature_index = feature_index
    return best_feature_index

# Decision tree algorithm
def build_tree(data):
    labels = [row[-1] for row in data]
    
    # If all examples have the same label, return a leaf node
    if len(set(labels)) == 1:
        return labels[0]
    
    # If there are no features left to split on, return the most common label
    if len(data[0]) == 1:
        return max(set(labels), key=labels.count)
    
    best_feature = best_split(data)
    
    tree = {best_feature: {}}
    feature_values = set([row[best_feature] for row in data])
    
    for value in feature_values:
        subset = [row for row in data if row[best_feature] == value]
        tree[best_feature][value] = build_tree([row[:best_feature] + row[best_feature+1:] for row in subset])
    
    return tree

# Function to classify a new example
def classify(tree, example):
    if not isinstance(tree, dict):  # If it's a leaf node
        return tree
    
    feature = list(tree.keys())[0]
    feature_value = example[feature]
    
    return classify(tree[feature].get(feature_value), example)

# Sample data (using categorical values for features + label)
data = [
    ['Sunny', 'Hot', 'High', 'Weak', 'Yes'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'Yes'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'High', 'Strong', 'No']
]

# Feature names (optional, for better understanding)
features = ['Outlook', 'Temperature', 'Humidity', 'Wind']

# Build the tree
tree = build_tree(data)

# Print the tree
print("Decision Tree:", tree)

# Classify new examples
new_example = ['Sunny', 'Cool', 'Normal', 'Weak']  # Example with features ['Sunny', 'Cool', 'Normal', 'Weak']
print("Prediction for ['Sunny', 'Cool', 'Normal', 'Weak']:", classify(tree, new_example))


Decision Tree: {3: {'Strong': {0: {'Overcast': 'Yes', 'Rainy': 'No', 'Sunny': 'No'}}, 'Weak': 'Yes'}}
Prediction for ['Sunny', 'Cool', 'Normal', 'Weak']: Yes
