In [2]:
import math
from collections import Counter, defaultdict

In [3]:
data = [
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': False, 'Play': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': True,  'Play': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': True,  'Play': 'No'},
    {'Outlook': 'Overcast', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': True,  'Play': 'Yes'},
    {'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': False, 'Play': 'No'},
    {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Sunny', 'Temperature': 'Mild', 'Humidity': 'Normal', 'Windy': True,  'Play': 'Yes'},
    {'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': True,  'Play': 'Yes'},
    {'Outlook': 'Overcast', 'Temperature': 'Hot', 'Humidity': 'Normal', 'Windy': False, 'Play': 'Yes'},
    {'Outlook': 'Rainy', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': True,  'Play': 'No'},
]

In [4]:
def entropy(data_subset):
    label_count = Counter(item['Play'] for item in data_subset)
    total = len(data_subset)
    ent = 0.0
    for count in label_count.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent

In [5]:
def information_gain(data, featrue):
    total_entropy = entropy(data)
    feature_values = set(item[featrue] for item in data)
    weighted_entropy = 0.0

    for value in feature_values:
        subset = [item for item in data if item[feature] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)
    gain = total_entropy - weighted_entropy
    return gain
        

In [7]:
features = ['Outlook', 'Temperature', 'Humidity', 'Windy']

print("Information Gain for each feature:")
for feature in features:
    gain = information_gain(data, feature)
    print(f"{feature}: {gain:.4f}")

Information Gain for each feature:
Outlook: 0.2467
Temperature: 0.0292
Humidity: 0.1518
Windy: 0.0481


In [8]:
def best_feature_to_split(data, features):
    best_gain = -1
    best_feature = None
    for feature in features:
        gain = information_gain(data, feature)
        if gain > best_gain:
            best_gain = gain
            best_feature = feature
    return best_feature

best = best_feature_to_split(data, features)
print(f"\nBest feature to split on: {best}")


Best feature to split on: Outlook


In [9]:
def split_data(data, feature):
    splits = defaultdict(list)
    for item in data:
        splits[item[feature]].append(item)
    return splits

In [10]:
splits = split_data(data, best)
print("\nData splits based on Outlook:")
for k, v in splits.items():
    plays = [item['Play'] for item in v]
    print(f"{k}: {plays}")


Data splits based on Outlook:
Sunny: ['No', 'No', 'No', 'Yes', 'Yes']
Overcast: ['Yes', 'Yes', 'Yes', 'Yes']
Rainy: ['Yes', 'Yes', 'No', 'Yes', 'No']


In [11]:
def predict(instance, tree):
    feature = tree['feature']
    if instance[feature] in tree['nodes']:
        return tree['nodes'][instance[feature]]
    else:
        return None

In [12]:
def majority_class(data_subset):
    labels = [item['Play'] for item in data_subset]
    return Counter(labels).most_common(1)[0][0]

tree = {
    'feature': best,
    'nodes': {k: majority_class(v) for k, v in splits.items()}
}

In [13]:
print("\nDecision tree (one-level):")
print(tree)


Decision tree (one-level):
{'feature': 'Outlook', 'nodes': {'Sunny': 'No', 'Overcast': 'Yes', 'Rainy': 'Yes'}}


In [14]:
test_instances = [
    {'Outlook': 'Sunny', 'Temperature': 'Hot', 'Humidity': 'High', 'Windy': False},
    {'Outlook': 'Rainy', 'Temperature': 'Cool', 'Humidity': 'Normal', 'Windy': True},
    {'Outlook': 'Overcast', 'Temperature': 'Mild', 'Humidity': 'High', 'Windy': False},
]


In [15]:
print("\nPredictions:")
for i, instance in enumerate(test_instances):
    pred = predict(instance, tree)
    print(f"Instance {i+1}: {pred}")


Predictions:
Instance 1: No
Instance 2: Yes
Instance 3: Yes


In [16]:
correct = 0
for item in data:
    pred = predict(item, tree)
    if pred == item['Play']:
        correct += 1

accuracy = correct / len(data)
print(f"\nTraining accuracy with one-level tree: {accuracy:.4f}")



Training accuracy with one-level tree: 0.7143
