In [None]:
import numpy as np
import pandas as pd
from collections import Counter

def entropy(y):
    counts = Counter(y)
    total = len(y)
    ent = 0
    for label, cnt in counts.items():
        p = cnt / total
        ent -= p * np.log2(p)
    return ent

def information_gain(y, x):
    total_entropy = entropy(y)
    values, counts = np.unique(x, return_counts=True)

    weighted_entropy = 0
    for v, c in zip(values, counts):
        subset_y = y[x == v]
        weighted_entropy += (c / len(x)) * entropy(subset_y)

    return total_entropy - weighted_entropy


class TreeNode:
    def __init__(self, feature=None, children=None, value=None):
        self.feature = feature      # feature to split
        self.children = children or {}   # branches: value -> child
        self.value = value          # leaf value


def id3(X, y, features):
    if len(np.unique(y)) == 1:
        return TreeNode(value=y.iloc[0])

    if len(features) == 0:
        return TreeNode(value=Counter(y).most_common(1)[0][0])

    gains = {f: information_gain(y, X[f]) for f in features}
    best_feature = max(gains, key=gains.get)

    root = TreeNode(feature=best_feature)
    feature_values = np.unique(X[best_feature])

    for v in feature_values:
        subset_X = X[X[best_feature] == v]
        subset_y = y[X[best_feature] == v]
        if len(subset_X) == 0:
            majority = Counter(y).most_common(1)[0][0]
            root.children[v] = TreeNode(value=majority)
        else:
            remaining_features = [f for f in features if f != best_feature]
            root.children[v] = id3(subset_X, subset_y, remaining_features)

    return root

def predict_single(node, x):
    if node.value is not None:
        return node.value

    feature_value = x[node.feature]

    if feature_value in node.children:
        return predict_single(node.children[feature_value], x)

    return None

def predict(tree, X):
    return X.apply(lambda row: predict_single(tree, row), axis=1)


data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny'],
    'Temp':    ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild'],
    'Humidity':['High', 'High', 'High', 'High', 'Normal', 'Normal', 'High', 'Normal'],
    'Wind':    ['Weak', 'Strong','Weak','Weak','Weak','Strong','Strong','Weak'],
    'PlayTennis':['No','No','Yes','Yes','Yes','No','Yes','Yes']
}

df = pd.DataFrame(data)

X = df.drop(columns=['PlayTennis'])
y = df['PlayTennis']
features = list(X.columns)

tree = id3(X, y, features)

print("Decision Tree model created successfully!")

# Predict on training data
pred = predict(tree, X)
print("\nPredictions:")
print(pred.values)

test = pd.DataFrame({
    'Outlook': ['Sunny'],
    'Temp': ['Cool'],
    'Humidity': ['High'],
    'Wind': ['Strong']
})

print("Prediction on new sample:", predict(tree, test).values)



Decision Tree model created successfully!

Predictions:
['No' 'No' 'Yes' 'Yes' 'Yes' 'No' 'Yes' 'Yes']
Prediction on new sample: ['No']
