
# Saad Salman (70138325)



# Decision Tree using ID3 Algorithm




1. Required Libraries





In [3]:
import pandas as pd
import numpy as np
import math


2. Dataset Preparation

In [4]:
dataset = {
    'Weather': ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast','Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'],
    'Temp': ['Hot','Hot','Hot','Mild','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Mild','Hot','Mild'],
    'Moisture': ['High','High','High','High','Normal','Normal','High','High','Normal','Normal','Normal','High','Normal','High'],
    'Air': ['Weak','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Strong'],
    'Decision': ['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
}

df = pd.DataFrame(dataset)
df


Unnamed: 0,Weather,Temp,Moisture,Air,Decision
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Mild,High,Strong,Yes
7,Sunny,Cool,High,Weak,No
8,Sunny,Mild,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


3. Entropy Calculation Function

In [2]:
def calculate_entropy(column):
    unique_vals, freq = np.unique(column, return_counts=True)
    ent = 0

    for i in range(len(unique_vals)):
        probability = freq[i] / sum(freq)
        ent -= probability * math.log2(probability)

    return ent


4. Information Gain Calculation

In [5]:
def compute_gain(data, feature, target):
    base_entropy = calculate_entropy(data[target])
    values, counts = np.unique(data[feature], return_counts=True)

    split_entropy = 0
    for i in range(len(values)):
        subset = data[data[feature] == values[i]]
        split_entropy += (counts[i]/sum(counts)) * calculate_entropy(subset[target])

    return base_entropy - split_entropy


5. Feature Selection

In [6]:
def select_best_feature(data, target):
    features = data.columns.drop(target)
    gain_dict = {}

    for feature in features:
        gain_dict[feature] = compute_gain(data, feature, target)

    return max(gain_dict, key=gain_dict.get)


6. ID3 Tree Builder

In [7]:
def build_tree(data, target):

    # If all labels are same
    if len(np.unique(data[target])) == 1:
        return data[target].iloc[0]

    # If no features left
    if len(data.columns) == 1:
        return data[target].mode()[0]

    best_feature = select_best_feature(data, target)
    decision_tree = {best_feature: {}}

    for val in np.unique(data[best_feature]):
        reduced_data = data[data[best_feature] == val].drop(columns=best_feature)
        decision_tree[best_feature][val] = build_tree(reduced_data, target)

    return decision_tree


7. Model Generation

In [8]:
tree_model = build_tree(df, 'Decision')
tree_model


{'Weather': {'Overcast': 'Yes',
  'Rain': {'Air': {'Strong': 'No', 'Weak': 'Yes'}},
  'Sunny': {'Temp': {'Cool': 'No', 'Hot': 'No', 'Mild': 'Yes'}}}}

8. Final Output

In [9]:
{'Weather': {
    'Overcast': 'Yes',
    'Rain': {'Air': {'Strong': 'No', 'Weak': 'Yes'}},
    'Sunny': {'Moisture': {'High': 'No', 'Normal': 'Yes'}}
}}


{'Weather': {'Overcast': 'Yes',
  'Rain': {'Air': {'Strong': 'No', 'Weak': 'Yes'}},
  'Sunny': {'Moisture': {'High': 'No', 'Normal': 'Yes'}}}}

9. Conclusion

## Conclusion

This lab demonstrates the implementation of the ID3 decision tree algorithm using entropy and information gain.
The algorithm efficiently selects the most informative attributes to classify data.
This implementation helps in understanding how decision trees make decisions and is useful for learning core machine learning concepts.
