In [1]:
import pandas as pd
import numpy as np


dataset_path = r'C:\Users\92318\Downloads\3-dataset.csv'
df = pd.read_csv(dataset_path)


def entropy(attribute):
    unique_values, counts = np.unique(attribute, return_counts=True)
    probabilities = counts / np.sum(counts)
    entropy_value = -np.sum(probabilities * np.log2(probabilities))
    return entropy_value


def information_gain(data, attribute_name, target_name):
    total_entropy = entropy(data[target_name])
    attribute_values, value_counts = np.unique(data[attribute_name], return_counts=True)
    weighted_entropy = np.sum([(value_counts[i] / np.sum(value_counts)) *
                               entropy(data.where(data[attribute_name] == attribute_values[i]).dropna()[target_name])
                               for i in range(len(attribute_values))])
    information_gain_value = total_entropy - weighted_entropy
    return information_gain_value


def find_best_attribute(data, target_name):
    attributes = list(data.columns)
    attributes.remove(target_name)
    information_gains = [information_gain(data, attribute, target_name) for attribute in attributes]
    best_attribute_index = np.argmax(information_gains)
    best_attribute = attributes[best_attribute_index]
    return best_attribute


def build_decision_tree(data, target_name, parent_node_class=None):
    
    if len(np.unique(data[target_name])) <= 1:
        return np.unique(data[target_name])[0]
    
    
    if len(data.drop(target_name, axis=1).columns) == 0:
        return parent_node_class
    
    
    best_attribute = find_best_attribute(data, target_name)
    tree = {best_attribute: {}}
    
    for attribute_value in np.unique(data[best_attribute]):
        sub_data = data.where(data[best_attribute] == attribute_value).dropna()
        subtree = build_decision_tree(sub_data, target_name, np.unique(data[target_name])[0])
        tree[best_attribute][attribute_value] = subtree
    
    return tree


target_name = df.columns[-1]
decision_tree = build_decision_tree(df, target_name)


print(decision_tree)


{'outlook': {'overcast': 'yes', 'rain': {'wind': {'strong': 'no', 'weak': 'yes'}}, 'sunny': {'humidity': {'high': 'no', 'normal': 'yes'}}}}
