In [1]:
import pandas as pd
import numpy as np

# Load dataset
dataset = pd.read_csv('playtennis.csv', names=['outlook', 'temperature', 'humidity', 'wind', 'class'])

# Function to calculate entropy
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy = np.sum([(-counts[i]/np.sum(counts)) * np.log2(counts[i]/np.sum(counts)) for i in range(len(elements))])
    return entropy

# Function to calculate information gain
def InfoGain(data, split_attribute_name, target_name="class"):
    total_entropy = entropy(data[target_name])
    vals, counts = np.unique(data[split_attribute_name], return_counts=True)
    Weighted_Entropy = np.sum([(counts[i]/np.sum(counts)) * entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name]) for i in range(len(vals))])
    Information_Gain = total_entropy - Weighted_Entropy
    return Information_Gain

# Function to implement ID3 algorithm
def ID3(data, originaldata, features, target_attribute_name="class", parent_node_class=None):
    # If all target values have the same value, return that value
    if len(np.unique(data[target_attribute_name])) <= 1:
        return np.unique(data[target_attribute_name])[0]
    
    # If the dataset is empty, return the mode of the original target feature
    elif len(data) == 0:
        return np.unique(originaldata[target_attribute_name])[np.argmax(np.unique(originaldata[target_attribute_name], return_counts=True)[1])]
    
    # If there are no more features, return the parent node class
    elif len(features) == 0:
        return parent_node_class
    
    # Otherwise, grow the tree
    else:
        # Set the default parent node class to the mode of the target feature
        parent_node_class = np.unique(data[target_attribute_name])[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
        
        # Compute the information gain for each feature
        item_values = [InfoGain(data, feature, target_attribute_name) for feature in features]
        
        # Select the feature with the highest information gain
        best_feature_index = np.argmax(item_values)
        best_feature = features[best_feature_index]
        
        # Create the tree structure
        tree = {best_feature: {}}
        
        # Remove the best feature from the list of features
        features = [i for i in features if i != best_feature]
        
        # Grow the tree recursively
        for value in np.unique(data[best_feature]):
            sub_data = data.where(data[best_feature] == value).dropna()
            subtree = ID3(sub_data, dataset, features, target_attribute_name, parent_node_class)
            tree[best_feature][value] = subtree
        
        return tree

# Build the tree
tree = ID3(dataset, dataset, dataset.columns[:-1])

# Display the tree
print('\nDisplay Tree\n', tree)



Display Tree
 {'outlook': {'Outlook': 'Play Tennis', 'Overcast': 'Yes', 'Rain': {'wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'humidity': {'High': 'No', 'Normal': 'Yes'}}}}
