In [1]:
import pandas as pd
import numpy as np

In [4]:
data = pd.read_csv("/content/ID3dataset.csv")

In [7]:
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy_val = -np.sum([(counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i in range(len(elements))])
    return entropy_val

In [9]:
def info_gain(data, split_attribute_name, target_name="target"):
    total_entropy = entropy(data[target_name])
    values, counts = np.unique(data[split_attribute_name], return_counts=True)
    weighted_entropy = 0
    for i in range(len(values)):
        subset = data[data[split_attribute_name] == values[i]]
        weighted_entropy += (counts[i] / np.sum(counts)) * entropy(subset[target_name])
    information_gain = total_entropy - weighted_entropy
    return information_gain

# ID3 Algorithm
def id3(data, original_data, features, target_attribute_name="target", parent_node_class=None):
    if len(np.unique(data[target_attribute_name])) == 1:
        return np.unique(data[target_attribute_name])[0]
    elif len(data) == 0:
        return np.unique(original_data[target_attribute_name])[np.argmax(np.unique(original_data[target_attribute_name], return_counts=True)[1])]
    elif len(features) == 0:
        return parent_node_class
    else:
        parent_node_class = np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
        item_values = [info_gain(data, feature, target_attribute_name) for feature in features]
        best_feature_index = np.argmax(item_values)
        best_feature = features[best_feature_index]
        tree = {best_feature: {}}
        features = [i for i in features if i != best_feature]
        for value in np.unique(data[best_feature]):
            sub_data = data[data[best_feature] == value]
            subtree = id3(sub_data, original_data, features, target_attribute_name, parent_node_class)
            tree[best_feature][value] = subtree
        return tree

features = list(data.columns)
features.remove("PlayTennis")  # Change "target" if your class/label column has a different name

tree = id3(data, data, features, target_attribute_name="PlayTennis")
print("ID3 Decision Tree:")
print(tree)

ID3 Decision Tree:
{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Temperature': {'Cool': 'Yes', 'Hot': 'No', 'Mild': 'No'}}}}
