<a href="https://colab.research.google.com/github/hemavijayalakshmi/Machine-Learning-Lab/blob/main/ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
 import pandas as pd
import math

# Function to calculate the entropy of a dataset
def entropy(data, target):
    # Count the number of instances for each target class
    class_counts = data[target].value_counts()  # Using the target passed to the function
    total_instances = len(data)
    entropy_val = 0
    # Calculate the entropy using the formula
    for count in class_counts:
        prob = count / total_instances
        entropy_val -= prob * math.log2(prob)
    return entropy_val

# Function to calculate information gain for a feature
def information_gain(data, feature, target):
    # Calculate the total entropy of the dataset
    total_entropy = entropy(data, target)

    # Get the unique values for the feature
    feature_values = data[feature].unique()

    # Calculate weighted entropy after splitting on the feature
    weighted_entropy = 0
    for value in feature_values:
        # Subset of data where the feature has the given value
        subset = data[data[feature] == value]
        subset_entropy = entropy(subset, target)
        weighted_entropy += (len(subset) / len(data)) * subset_entropy

    # Information Gain = Entropy before splitting - Weighted Entropy after splitting
    return total_entropy - weighted_entropy

# Function to build the ID3 decision tree
def id3(data, features, target):
    # If all instances have the same target class, return a leaf node
    if len(data[target].unique()) == 1:
        return data[target].iloc[0]

    # If there are no more features to split on, return the most frequent target class
    if not features:
        return data[target].mode()[0]

    # Select the feature with the highest information gain
    best_feature = max(features, key=lambda feature: information_gain(data, feature, target))

    # Create a node with the best feature
    tree = {best_feature: {}}

    # Recur for each subset of the data based on the best feature
    for value in data[best_feature].unique():
        # Subset of the data where the best feature has a specific value
        subset = data[data[best_feature] == value]

        # Exclude the best feature from the remaining features
        subset_features = [f for f in features if f != best_feature]

        # Recursively build the tree for the subset
        tree[best_feature][value] = id3(subset, subset_features, target)

    return tree

# Load dataset from CSV
df = pd.read_csv("Tennis.csv")

# Check the first few rows to verify the dataset structure
print(df.head())

# List of features (excluding the target column 'Play')
features = ['Outlook', 'Temp', 'Humidity', 'Wind']
target = 'Play'  # Corrected target column name

# Build the decision tree
tree = id3(df, features, target)

# Print the resulting decision tree
print("Decision Tree:")
print(tree)


    Outlook  Temp Humidity    Wind Play
0     Sunny   Hot     High    Weak   No
1     Sunny   Hot     High  Strong   No
2  Overcast   Hot     High    Weak  Yes
3      Rain  Mild     High    Weak  Yes
4      Rain  Cool   Normal    Weak  Yes
Decision Tree:
{'Outlook': {'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}, 'Overcast': 'Yes', 'Rain': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}
