In [None]:
# Decision Tree ID3 Algorithm
# The ID3 (Iterative Dichotomiser 3) algorithm is a decision tree algorithm that uses the concept of information gain to build the decision tree.

import math

# Step 1: Define the Node class for the decision tree
class Node:
    def _init_(self, attribute):
        self.attribute = attribute
        self.children = {}

# Step 2: Define the ID3 algorithm function
def id3(data, target_attribute, attributes):
    # Create a new node
    node = Node(None)

    # If all examples belong to the same class, return a leaf node with that class
    if len(set(data[target_attribute])) == 1:
        node.attribute = data[target_attribute].iloc[0]
        return node

    # If there are no more attributes to split on, return a leaf node with the majority class
    if len(attributes) == 0:
        node.attribute = data[target_attribute].value_counts().idxmax()
        return node

    # Calculate the information gain for each attribute
    info_gains = []
    for attribute in attributes:
        info_gain = calculate_information_gain(data, attribute, target_attribute)
        info_gains.append(info_gain)

    # Select the attribute with the highest information gain
    best_attribute_index = info_gains.index(max(info_gains))
    best_attribute = attributes[best_attribute_index]

    # Set the node's attribute to the best attribute
    node.attribute = best_attribute

    # Remove the best attribute from the list of attributes
    new_attributes = attributes.copy()
    new_attributes.remove(best_attribute)

    # Create a child node for each possible value of the best attribute
    for value in data[best_attribute].unique():
        subset = data[data[best_attribute] == value]
        if subset.empty:
            # If the subset is empty, create a leaf node with the majority class
            child_node = Node(data[target_attribute].value_counts().idxmax())
        else:
            # Recursively call the ID3 algorithm on the subset
            child_node = id3(subset, target_attribute, new_attributes)
        node.children[value] = child_node

    return node

# Step 3: Define the function to calculate information gain
def calculate_information_gain(data, attribute, target_attribute):
    # Calculate the entropy of the target attribute
    target_entropy = calculate_entropy(data[target_attribute])

    # Calculate the weighted average entropy of the attribute
    attribute_entropy = 0
    attribute_value_counts = data[attribute].value_counts()
    total_examples = len(data)
    for value, count in attribute_value_counts.items():
        subset = data[data[attribute] == value]
        subset_entropy = calculate_entropy(subset[target_attribute])
        attribute_entropy += (count/total_examples) * subset_entropy

    # Calculate the information gain
    information_gain = target_entropy - attribute_entropy

    return information_gain

# Step 4: Define the function to calculate entropy
def calculate_entropy(target_attribute):
    entropy = 0
    total_examples = len(target_attribute)
    class_counts = target_attribute.value_counts()
    for count in class_counts:
        probability = count / total_examples
        entropy -= probability * math.log2(probability)

    return entropy

# Step 5: Define the function to classify a new sample using the decision tree
def classify_sample(sample, decision_tree):
    # Traverse the decision tree until a leaf node is reached
    while decision_tree.children:
        attribute = decision_tree.attribute
        value = sample[attribute]
        if value not in decision_tree.children:
            # If the attribute value is not present in the decision tree, return None (unknown)
            return None
        decision_tree = decision_tree.children[value]

    # Return the class label of the leaf node
    return decision_tree.attribute

# Step 6: Prepare the dataset for the decision tree
# Here,"PlayTennis" dataset is being considered
import pandas as pd

data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Step 7: Build the decision tree
target_attribute = 'PlayTennis'
attributes = ['Outlook', 'Temperature', 'Humidity', 'Wind']
decision_tree = id3(df, target_attribute, attributes)

# Step 8: Classify a new sample using the decision tree
new_sample = {
    'Outlook': 'Sunny',
    'Temperature': 'Cool',
    'Humidity': 'High',
    'Wind': 'Strong'
}

classification = classify_sample(new_sample, decision_tree)
print("Classification:", classification)