In [3]:
import numpy as np
import pandas as pd

def load_data(file_path='enjoysport.csv'):
    df = pd.read_csv(file_path)
    df.columns = ['Day', 'Outlook', 'Temperature', 'Humidity', 'Wind', 'Decision']
    df = df.drop(columns=['Day'])
    print("Length: ", len(df))
    print("Shape: ", df.shape)
    return df

def compute_entropy(data):
    counts = data.iloc[:, -1].value_counts()
    ent = 0
    for lbl in counts.keys():
        prob = counts[lbl] / sum(counts)
        ent -= prob * np.log2(prob)
    return ent

def compute_information_gain(data, feature):
    initial_ent = compute_entropy(data)
    unique_vals = data[feature].unique()
    val_counts = data[feature].value_counts()
    entropies = []
    for val in unique_vals:
        subset = data[data[feature] == val]
        entropies.append(compute_entropy(subset))
    weighted_ent = sum((val_counts[val] / len(data)) * entropies[i] for i, val in enumerate(unique_vals))
    return initial_ent - weighted_ent

def get_best_feature(data, used_features):
    best_feature = None
    max_info_gain = 0
    remaining_features = [feat for feat in data.columns[:-1] if feat not in used_features]

    if remaining_features:
        for feat in remaining_features:
            info_gain = compute_information_gain(data, feat)
            if info_gain > max_info_gain:
                max_info_gain = info_gain
                best_feature = feat

    return best_feature

def build_tree(data, used_features):
    if compute_entropy(data) == 0:
        return data.iloc[0, -1]

    best_feature = get_best_feature(data, used_features)
    if not best_feature:
        return None

    used_features.append(best_feature)
    tree = {}
    tree[best_feature] = {}

    for val in data[best_feature].unique():
        subset = data[data[best_feature] == val]
        subtree = build_tree(subset, used_features[:])
        tree[best_feature][val] = subtree

    return tree

data = load_data()
used_features = []
decision_tree = build_tree(data, used_features)

def format_tree(tree):
    if isinstance(tree, dict):
        formatted = []
        for key, vals in tree.items():
            for val, subtree in vals.items():
                formatted.append(f"('{val}', {format_tree(subtree)})")
        return f"{{'{key}': [{', '.join(formatted)}]}}"
    else:
        return f"'{tree}'"

formatted_tree = format_tree(decision_tree)
print("Final Tree: ", formatted_tree)


FileNotFoundError: [Errno 2] No such file or directory: 'enjoysport.csv'