<a href="https://colab.research.google.com/github/harikanemala/Machine-Learning/blob/main/ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import math
from collections import Counter

# Define the entropy function
def entropy(data):
    total = len(data)
    counts = Counter([row[-1] for row in data])
    ent = 0
    for count in counts.values():
        p = count / total
        ent -= p * math.log2(p)
    return ent

# Function to calculate the information gain
def information_gain(data, attribute_index):
    total_entropy = entropy(data)
    values = set([row[attribute_index] for row in data])
    weighted_entropy = 0

    for value in values:
        subset = [row for row in data if row[attribute_index] == value]
        weighted_entropy += (len(subset) / len(data)) * entropy(subset)

    return total_entropy - weighted_entropy

# Function to choose the best attribute based on information gain
def best_attribute(data):
    num_attributes = len(data[0]) - 1  # last column is the class label
    gains = []

    for i in range(num_attributes):
        gains.append((i, information_gain(data, i)))

    best_attr_index = max(gains, key=lambda x: x[1])[0]
    return best_attr_index

# Function to split data based on the attribute
def split_data(data, attribute_index):
    values = set([row[attribute_index] for row in data])
    subsets = {}

    for value in values:
        subsets[value] = [row for row in data if row[attribute_index] == value]

    return subsets

# Function to create the decision tree
def create_tree(data, attributes=None):
    # Base case: if all data have the same class
    if len(set([row[-1] for row in data])) == 1:
        return data[0][-1]

    # Base case: if no attributes left to split
    if not attributes:
        return Counter([row[-1] for row in data]).most_common(1)[0][0]

    best_attr = best_attribute(data)
    tree = {best_attr: {}}
    subsets = split_data(data, best_attr)

    # Recursively create the tree for each subset
    for value, subset in subsets.items():
        tree[best_attr][value] = create_tree(subset, [attr for attr in attributes if attr != best_attr])

    return tree

# Function to classify a new sample using the decision tree
def classify(tree, sample):
    if isinstance(tree, dict):
        attribute = list(tree.keys())[0]
        value = sample[attribute]
        return classify(tree[attribute][value], sample)
    else:
        return tree

# Example Dataset: Play Tennis Dataset
# Attributes: Outlook, Temperature, Humidity, Wind
# Class: PlayTennis (Yes/No)
dataset = [
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rain', 'Mild', 'High', 'Strong', 'No']
]

# Building the decision tree
tree = create_tree(dataset, attributes=[0, 1, 2, 3])  # Using the indices of the attributes
print("Decision Tree:", tree)

# Classifying a new sample
new_sample = ['Sunny', 'Mild', 'High', 'Strong']  # Example to classify
predicted_class = classify(tree, new_sample)
print("Predicted class for the new sample:", predicted_class)


Decision Tree: {0: {'Sunny': {2: {'Normal': 'Yes', 'High': 'No'}}, 'Rain': {3: {'Strong': 'No', 'Weak': 'Yes'}}, 'Overcast': 'Yes'}}
Predicted class for the new sample: No
