In [None]:
import pandas as pd
import numpy as np
from math import log2

# Sample Dataset
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})

# Calculate entropy
def entropy(col):
    _, counts = np.unique(col, return_counts=True)
    return -sum((count/len(col)) * log2(count/len(col)) for count in counts)

# Calculate information gain
def info_gain(data, attr, target="PlayTennis"):
    total_entropy = entropy(data[target])
    vals, counts = np.unique(data[attr], return_counts=True)
    weighted_entropy = sum((counts[i]/sum(counts)) * entropy(data[data[attr] == vals[i]][target]) for i in range(len(vals)))
    return total_entropy - weighted_entropy

# Recursive ID3 algorithm
def id3(data, attributes, target="PlayTennis"):
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]
    if len(attributes) == 0:
        return data[target].mode()[0]

    best_attr = max(attributes, key=lambda attr: info_gain(data, attr, target))
    tree = {best_attr: {}}
    remaining_attrs = [attr for attr in attributes if attr != best_attr]

    for value in np.unique(data[best_attr]):
        sub_data = data[data[best_attr] == value]
        subtree = id3(sub_data, remaining_attrs, target)
        tree[best_attr][value] = subtree

    return tree

# Pretty-print the decision tree in a hierarchical format
def print_tree(tree, depth=0):
    if not isinstance(tree, dict):
        print(" " * depth * 2, "=>", tree)
    else:
        for attr, branches in tree.items():
            print(" " * depth * 2, attr)
            for value, subtree in branches.items():
                print(" " * (depth + 1) * 2, value)
                print_tree(subtree, depth + 2)

# Define attributes and build the decision tree
attributes = list(data.columns[:-1])
tree = id3(data, attributes)

# Print the decision tree
print("Decision Tree:")
print_tree(tree)


Decision Tree:
 Outlook
   Overcast
     => Yes
   Rain
     Wind
       Strong
         => No
       Weak
         => Yes
   Sunny
     Humidity
       High
         => No
       Normal
         => Yes


In [None]:
import pandas as pd
import numpy as np
from math import log2

# Sample Dataset
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': [85, 80, 83, 70, 68, 65, 64, 72, 69, 75, 75, 72, 81, 71],
    'Humidity': [85, 90, 78, 96, 80, 70, 65, 95, 70, 80, 70, 90, 75, 80],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})

# Calculate entropy
def entropy(col):
    _, counts = np.unique(col, return_counts=True)
    return -sum((count / len(col)) * log2(count / len(col)) for count in counts)

# Calculate gain
def gain(data, attr, target="PlayTennis"):
    total_entropy = entropy(data[target])
    vals, counts = np.unique(data[attr], return_counts=True)
    weighted_entropy = sum((counts[i] / sum(counts)) * entropy(data[data[attr] == vals[i]][target]) for i in range(len(vals)))
    return total_entropy - weighted_entropy

# Calculate split information for gain ratio
def split_info(data, attr):
    vals, counts = np.unique(data[attr], return_counts=True)
    return -sum((count / len(data)) * log2(count / len(data)) for count in counts)

# Calculate gain ratio
def gain_ratio(data, attr, target="PlayTennis"):
    gain_value = gain(data, attr, target)
    split_info_value = split_info(data, attr)
    return gain_value / split_info_value if split_info_value != 0 else 0

# Recursive C4.5 algorithm
def c4_5(data, attributes, target="PlayTennis"):
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]
    if len(attributes) == 0:
        return data[target].mode()[0]

    best_attr = max(attributes, key=lambda attr: gain_ratio(data, attr, target))
    tree = {best_attr: {}}
    remaining_attrs = [attr for attr in attributes if attr != best_attr]

    for value in np.unique(data[best_attr]):
        sub_data = data[data[best_attr] == value]
        subtree = c4_5(sub_data, remaining_attrs, target)
        tree[best_attr][value] = subtree

    return tree

# Pretty-print the C4.5 decision tree in a hierarchical format
def print_tree(tree, depth=0):
    if not isinstance(tree, dict):
        print(" " * depth * 2, "=>", tree)
    else:
        for attr, branches in tree.items():
            print(" " * depth * 2, attr)
            for value, subtree in branches.items():
                print(" " * (depth + 1) * 2, value)
                print_tree(subtree, depth + 2)

# Define attributes and build the C4.5 decision tree
attributes = list(data.columns[:-1])
tree = c4_5(data, attributes)

# Print the C4.5 decision tree
print("C4.5 Decision Tree:")
print_tree(tree)


C4.5 Decision Tree:
 Temperature
   64
     => Yes
   65
     => No
   68
     => Yes
   69
     => Yes
   70
     => Yes
   71
     => No
   72
     Outlook
       Overcast
         => Yes
       Sunny
         => No
   75
     => Yes
   80
     => No
   81
     => Yes
   83
     => Yes
   85
     => No


In [None]:
import pandas as pd
import numpy as np

# Sample Dataset
data = pd.DataFrame({
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})

# Calculate Gini index
def gini_index(col):
    _, counts = np.unique(col, return_counts=True)
    return 1 - sum((count / len(col)) ** 2 for count in counts)

# Calculate Gini index for a split
def gini_split(data, attr, target="PlayTennis"):
    vals, counts = np.unique(data[attr], return_counts=True)
    weighted_gini = sum((counts[i] / sum(counts)) * gini_index(data[data[attr] == vals[i]][target]) for i in range(len(vals)))
    return weighted_gini

# Recursive CART algorithm with customized print format
def cart(data, attributes, target="PlayTennis"):
    if len(np.unique(data[target])) == 1:
        return np.unique(data[target])[0]
    if len(attributes) == 0:
        return data[target].mode()[0]

    best_attr = min(attributes, key=lambda attr: gini_split(data, attr, target))
    tree = {best_attr: {}}
    remaining_attrs = [attr for attr in attributes if attr != best_attr]

    for value in np.unique(data[best_attr]):
        sub_data = data[data[best_attr] == value]
        subtree = cart(sub_data, remaining_attrs, target)
        tree[best_attr][value] = subtree

    return tree

# Pretty-print the CART decision tree in the requested format
def print_tree(tree, depth=0):
    if not isinstance(tree, dict):
        print(" " * depth * 2, "=>", tree)
    else:
        for attr, branches in tree.items():
            print(" " * depth * 2, attr)
            for value, subtree in branches.items():
                print(" " * (depth + 1) * 2, value)
                print_tree(subtree, depth + 2)

# Define attributes and build the CART decision tree
attributes = list(data.columns[:-1])
tree = cart(data, attributes)

# Print the CART decision tree
print("CART Decision Tree:")
print_tree(tree)


CART Decision Tree:
 Outlook
   Overcast
     => Yes
   Rain
     Wind
       Strong
         => No
       Weak
         => Yes
   Sunny
     Humidity
       High
         => No
       Normal
         => Yes
