In [6]:
import math
import pandas as pd

def calculate_entropy(data, target_attribute):
    entropy = 0
    total_records = len(data[target_attribute])
    unique_classes = set(data[target_attribute])

    for value in unique_classes:
        count = data[target_attribute].eq(value).sum()
        probability = count / total_records
        entropy -= probability * math.log2(probability)

    return entropy

def calculate_gain(data, attribute, target_attribute):
    unique_values = set(data[attribute])
    total_records = len(data[target_attribute])
    gain = calculate_entropy(data, target_attribute)

    for value in unique_values:
        subset = data[data[attribute] == value]
        subset_entropy = calculate_entropy(subset, target_attribute)
        subset_weight = len(subset) / total_records
        gain -= subset_weight * subset_entropy

    return gain

# Convert the data to a pandas DataFrame
df = pd.DataFrame(data)

# Target attribute
target_attribute = 'Play Tennis'

# Calculate entropy and gain for each attribute
for attribute in df.columns:
    if attribute != target_attribute:
        entropy = calculate_entropy(df, target_attribute)
        gain = calculate_gain(df, attribute, target_attribute)
        print(f"Attribute: {attribute}, Entropy: {entropy:.4f}, Gain: {gain:.4f}")


Attribute: Day, Entropy: 0.9403, Gain: 0.9403
Attribute: Outlook, Entropy: 0.9403, Gain: 0.2467
Attribute: Temperature, Entropy: 0.9403, Gain: 0.0292
Attribute: Humidity, Entropy: 0.9403, Gain: 0.1898
Attribute: Wind, Entropy: 0.9403, Gain: 0.2449
