In [48]:
data = [
    [True,  "HOT",  "HIGH",   "NO"],
    [True,  "HOT",  "HIGH",   "NO"],
    [False, "HOT",  "HIGH",   "YES"],
    [False, "COOL", "NORMAL", "YES"],
    [False, "COOL", "NORMAL", "YES"],
    [True,  "COOL", "HIGH",   "NO"],
    [True,  "HOT",  "HIGH",   "NO"],
    [True,  "HOT",  "NORMAL", "YES"],
    [False, "COOL", "NORMAL", "YES"],
    [False, "COOL", "HIGH", "YES"]
]

In [49]:
features = ["a1", "a2", "a3"]

In [50]:
import math
from collections import Counter

In [51]:
def entropy(labels):
    # print(labels)
    total = len(labels)
    # print("Total labels:", total)
    counts = Counter(labels)
    # print("Label counts:", counts)
    ent = 0
    for count in counts.values():
        p = count / total
        ent -= p*math.log2(p)
    return ent
# labels = [row[-1] for row in data]
# ent = entropy(labels)
# print("Entropy of the labels:", ent)

In [52]:
def information_gain(data,feature_index,target_attr):
    labels = [row[target_attr] for row in data]
    base_entropy = entropy(labels)
    print("Base entropy:", base_entropy)
    #split by feature value
    values = set(row[feature_index] for row in data)
    print("vlaues:",values)
    weighted_entropy = 0
    for val in values:
        subset = [row for row in data if row[feature_index]== val]
        subset_labels = [row[target_attr] for row in subset]
        weighted_entropy +=(len(subset) /len(data)) * entropy(subset_labels)
        print(f"subset : {subset} subset_labels : {subset_labels} weighted_entropy: {weighted_entropy}")
        
    return base_entropy - weighted_entropy

# information_gain(data,0)

In [53]:
def majority_class(data , target_attr):
    values = [row[target_attr]for row in data]
    print("Values:", values)
    return Counter(values).most_common(1)[0][0]

In [54]:
def build_tree(data,features , target_attr):
    labels = [row[target_attr] for row in data]
    if labels.count(labels[0]) == len(labels):
        return labels[0]
    if not features:
        return majority_class(data,target_attr)
    
    gains = [(attr , information_gain(data,attr , target_attr)) for attr in features]
    best_attr = max(gains,key=lambda x:x[1])[0]
    
    tree = {best_attr:{}}
    unique_vals = set(row[best_attr] for row in data)
    
    
    for val in unique_vals:
        subset = [row for row in data if row[best_attr] == val]
        new_features = [f for f in features if f!=best_attr]
        subtree = build_tree(subset,new_features,target_attr)
        tree[best_attr][val] = subtree
    return tree

In [55]:
def predict(tree , sample):
    if not isinstance(tree , dict):
        return tree

    attr = next(iter(tree))
    
    if isinstance(sample , list):
        value = sample[attr]
    else:
        value = sample.get(attr)

    if value not in tree[attr]:
        return "Unknown"
    
    return predict(tree[attr][value],sample)

In [56]:
features = [0,1,2]
target = -1
tree = build_tree(data,features,target)
print("Decision Tree:")
print(tree)

test_sample = [True ,"COOL", "HIGH"]

print("Prediction for", test_sample, ":", predict(tree, test_sample))




Base entropy: 0.9709505944546686
vlaues: {False, True}
subset : [[False, 'HOT', 'HIGH', 'YES'], [False, 'COOL', 'NORMAL', 'YES'], [False, 'COOL', 'NORMAL', 'YES'], [False, 'COOL', 'NORMAL', 'YES'], [False, 'COOL', 'HIGH', 'YES']] subset_labels : ['YES', 'YES', 'YES', 'YES', 'YES'] weighted_entropy: 0.0
subset : [[True, 'HOT', 'HIGH', 'NO'], [True, 'HOT', 'HIGH', 'NO'], [True, 'COOL', 'HIGH', 'NO'], [True, 'HOT', 'HIGH', 'NO'], [True, 'HOT', 'NORMAL', 'YES']] subset_labels : ['NO', 'NO', 'NO', 'NO', 'YES'] weighted_entropy: 0.36096404744368116
Base entropy: 0.9709505944546686
vlaues: {'HOT', 'COOL'}
subset : [[True, 'HOT', 'HIGH', 'NO'], [True, 'HOT', 'HIGH', 'NO'], [False, 'HOT', 'HIGH', 'YES'], [True, 'HOT', 'HIGH', 'NO'], [True, 'HOT', 'NORMAL', 'YES']] subset_labels : ['NO', 'NO', 'YES', 'NO', 'YES'] weighted_entropy: 0.4854752972273343
subset : [[False, 'COOL', 'NORMAL', 'YES'], [False, 'COOL', 'NORMAL', 'YES'], [True, 'COOL', 'HIGH', 'NO'], [False, 'COOL', 'NORMAL', 'YES'], [False

In [57]:
test_samples = [
    [True, "HOT", "HIGH"],      
    [False, "COOL", "NORMAL"], 
    [True, "COOL", "NORMAL"],   
    [False, "HOT", "NORMAL"],   
]

print("Testing individual samples:")
correct_predictions = 0
total_samples = len(test_samples)

for i, sample in enumerate(test_samples):
    prediction = predict(tree, sample)
    print(f"Sample {i+1}: {sample} -> Prediction: {prediction}")
    
    expected = ["NO", "YES", "YES", "YES"] 
    if prediction == expected[i]:
        correct_predictions += 1

accuracy_percentage = (correct_predictions / total_samples) * 100
print(f"\nTest Accuracy: {accuracy_percentage:.1f}% ({correct_predictions}/{total_samples})")

Testing individual samples:
Sample 1: [True, 'HOT', 'HIGH'] -> Prediction: NO
Sample 2: [False, 'COOL', 'NORMAL'] -> Prediction: YES
Sample 3: [True, 'COOL', 'NORMAL'] -> Prediction: YES
Sample 4: [False, 'HOT', 'NORMAL'] -> Prediction: YES

Test Accuracy: 100.0% (4/4)
