In [None]:
import pandas as pd
import numpy as np


df = pd.read_csv('/content/tennis.csv')

In [None]:
df.head(14)

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [None]:
def entropy(y):

       values,counts = np.unique(y,return_counts=True)
       probs = counts / counts.sum()
       return -np.sum(probs * np.log2(probs))

In [None]:
def info_gain(data, feature, target="play"):
    if len(data) == 0:
        return 0.0

    total_entropy = entropy(data[target])

    values, counts = np.unique(data[feature], return_counts=True)
    weighted_entropy = 0
    for v, c in zip(values, counts):
        subset = data[data[feature] == v][target]
        if len(subset) == 0:
            continue
        weighted_entropy += (c / len(data)) * entropy(subset)

    return total_entropy - weighted_entropy if not np.isnan(total_entropy) else 0.0


In [None]:
def id3(data, features, target="play"):

    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]

    if len(data) == 0:
        return None

    if len(features) == 0:
        return data[target].mode()[0]


    gains = [info_gain(data, f, target) for f in features]


    if max(gains) == 0:
        return data[target].mode()[0]

    best_feature = features[np.argmax(gains)]


    tree = {best_feature: {}}

    for v in np.unique(data[best_feature]):
        subset = data[data[best_feature] == v]
        sub_features = [f for f in features if f != best_feature]
        subtree = id3(subset, sub_features, target)
        if subtree is None:
            subtree = data[target].mode()[0]
        tree[best_feature][v] = subtree

    return tree


In [None]:
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree

    root = next(iter(tree))
    value = sample[root]

    if value in tree[root]:
        return predict(tree[root][value], sample)
    else:
        return None

features = list(df.columns[:-1])
decision_tree = id3(df, features)

print("Decision Tree (Dictionary):")
print(decision_tree)

print("\nDataset Entropy (Loss Function):", entropy(df['play']))

sample = {'outlook': 'sunny', 'temp': 'cool', 'humidity': 'high', 'windy': True}
print("\nPrediction for sample:", sample)
print("Result:", predict(decision_tree, sample))

Decision Tree (Dictionary):
{'outlook': {'overcast': 'yes', 'rainy': {'windy': {np.False_: 'yes', np.True_: 'no'}}, 'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}}}

Dataset Entropy (Loss Function): 0.9402859586706311

Prediction for sample: {'outlook': 'sunny', 'temp': 'cool', 'humidity': 'high', 'windy': True}
Result: no
