<a href="https://colab.research.google.com/github/avyukthinna/ML_Lab/blob/main/1BM22CS060_ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import math
import pandas as pd

data = [
    ['Sunny', 'Hot', 'High', 'Weak', 'No'],
    ['Sunny', 'Hot', 'High', 'Strong', 'No'],
    ['Overcast', 'Hot', 'High', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'High', 'Weak', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Cool', 'Normal', 'Strong', 'No'],
    ['Overcast', 'Cool', 'Normal', 'Strong', 'Yes'],
    ['Sunny', 'Mild', 'High', 'Weak', 'No'],
    ['Sunny', 'Cool', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'Normal', 'Weak', 'Yes'],
    ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
    ['Overcast', 'Mild', 'High', 'Strong', 'Yes'],
    ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
    ['Rainy', 'Mild', 'High', 'Strong', 'No']
]

columns = ['Weather', 'Temperature', 'Humidity', 'Wind', 'Going to Gold']

df = pd.DataFrame(data, columns=columns)


def calculate_entropy(df, target_col):
    target_values = df[target_col].value_counts()
    entropy = 0
    for value in target_values:
        p = value / len(df)
        entropy -= p * math.log2(p)
    return entropy


def calculate_information_gain(df, feature, target_col):
    total_entropy = calculate_entropy(df, target_col)
    feature_values = df[feature].value_counts().to_dict()

    weighted_entropy = 0
    for value, count in feature_values.items():
        subset = df[df[feature] == value]
        subset_entropy = calculate_entropy(subset, target_col)
        weighted_entropy += (count / len(df)) * subset_entropy

    information_gain = total_entropy - weighted_entropy
    return information_gain


def find_best_feature(df, target_col):
    features = df.columns[:-1]
    best_feature = None
    max_info_gain = -1

    for feature in features:
        info_gain = calculate_information_gain(df, feature, target_col)
        if info_gain > max_info_gain:
            max_info_gain = info_gain
            best_feature = feature

    return best_feature


def id3(df, target_col):
    if len(df[target_col].unique()) == 1:
        return df[target_col].iloc[0]

    if len(df.columns) == 1:
        return df[target_col].mode().iloc[0]

    best_feature = find_best_feature(df, target_col)
    tree = {best_feature: {}}

    feature_values = df[best_feature].unique()
    for value in feature_values:
        subset = df[df[best_feature] == value].drop(columns=[best_feature])
        subtree = id3(subset, target_col)
        tree[best_feature][value] = subtree

    return tree


decision_tree = id3(df, 'Going to Gold')

def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    feature = next(iter(tree))
    value = sample[feature]
    if value in tree[feature]:
        return predict(tree[feature][value], sample)
    else:
        return "Unknown"

import pprint
print("Decision Tree:")
pprint.pprint(decision_tree)

sample = {'Weather': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
prediction = predict(decision_tree, sample)
print(f"\nPrediction for sample {sample}: {prediction}")



Decision Tree:
{'Weather': {'Overcast': 'Yes',
             'Rainy': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},
             'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}

Prediction for sample {'Weather': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}: No
