In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import math

In [2]:
data = [
    [30, 'high', 'no', 'fair', 'no'],
    [30, 'high', 'no', 'excellent', 'no'],
    [31, 'medium', 'no', 'fair', 'yes'],
    [40, 'low', 'no', 'fair', 'yes'],
    [40, 'low', 'yes', 'fair', 'yes'],
    [40, 'low', 'yes', 'excellent', 'no'],
    [31, 'medium', 'yes', 'excellent', 'yes'],
    [30, 'high', 'no', 'fair', 'no'],
    [30, 'medium', 'yes', 'fair', 'yes'],
    [31, 'medium', 'yes', 'excellent', 'yes'],
    [31, 'high', 'no', 'excellent', 'yes'],
    [40, 'medium', 'no', 'fair', 'yes'],
    [40, 'high', 'yes', 'fair', 'yes'],
    [31, 'medium', 'no', 'excellent', 'no']
]

In [3]:
df = pd.DataFrame(data, columns=['age', 'income', 'student', 'credit_rating', 'buys_computer'])
X = df.drop(columns=['buys_computer'])
y = df['buys_computer']
     

In [4]:
class DecisionTreeNode:
    def __init__(self, feature=None, value=None, left=None, right=None, target_class=None):
        self.feature = feature 
        self.value = value
        self.left = left  
        self.right = right  
        self.target_class = target_class
        

     

In [5]:
def calculate_entropy(y):
    class_counts = y.value_counts()
    entropy = 0
    for count in class_counts:
        probability = count / len(y)
        entropy -= probability * math.log2(probability)
    return entropy
     

In [6]:
def calculate_information_gain(X, y, feature, split_value):
    
    total_entropy = calculate_entropy(y)
    left_indices = X[feature] <= split_value
    right_indices = X[feature] > split_value
    left_entropy = calculate_entropy(y[left_indices])
    right_entropy = calculate_entropy(y[right_indices])

    left_weight = sum(left_indices) / len(y)
    right_weight = sum(right_indices) / len(y)
    information_gain = total_entropy - (left_weight * left_entropy + right_weight * right_entropy)

    return information_gain

In [7]:
def build_decision_tree(X, y):
    if len(set(y)) == 1:  
        return DecisionTreeNode(target_class=y.iloc[0])

    if len(X.columns) == 0:  # If there are no features left to split on
        return DecisionTreeNode(target_class=y.mode()[0])

    best_information_gain = 0
    best_feature = None
    best_split_value = None

    for feature in X.columns:
        unique_values = X[feature].unique()
        for value in unique_values:
            information_gain = calculate_information_gain(X, y, feature, value)
            if information_gain > best_information_gain:
                best_information_gain = information_gain
                best_feature = feature
                best_split_value = value

    left_indices = X[best_feature] <= best_split_value
    right_indices = X[best_feature] > best_split_value
    left_subtree = build_decision_tree(X[left_indices], y[left_indices])
    right_subtree = build_decision_tree(X[right_indices], y[right_indices])

    return DecisionTreeNode(feature=best_feature, value=best_split_value, left=left_subtree, right=right_subtree)

In [8]:
decision_tree = build_decision_tree(X, y)

In [9]:
def predict(tree, sample):
    if tree.target_class is not None:
        return tree.target_class
    feature_index = X.columns.get_loc(tree.feature)
    if sample[feature_index] <= tree.value:
        return predict(tree.left, sample)
    else:
        return predict(tree.right, sample)

In [10]:
data2 = [
    [20, 'low', 'yes', 'excellent'],
    [30, 'high', 'no', 'fair'],
    [40, 'medium', 'yes', 'fair'],
    [50, 'low', 'no', 'fair'],
    [25, 'high', 'yes', 'excellent'],
    [35, 'medium', 'no', 'fair'],
    [45, 'low', 'yes', 'excellent'],
    [55, 'high', 'no', 'fair'],
     [30, 'high', 'no', 'fair'],
    [30, 'high', 'no', 'excellent'],
    [31, 'medium', 'no', 'fair'],
    [40, 'low', 'no', 'fair'],
    [40, 'low', 'yes', 'fair'],
    [40, 'low', 'yes', 'excellent'],
    [31, 'medium', 'yes', 'excellent'],
    [30, 'high', 'no', 'fair'],
    [30, 'medium', 'yes', 'fair'],
    [31, 'medium', 'yes', 'excellent'],
    [31, 'high', 'no', 'excellent'],
    [40, 'medium', 'no', 'fair'],
    [40, 'high', 'yes', 'fair'],
    [31, 'medium', 'no', 'excellent']
]

for sample in data2:
    prediction = predict(decision_tree, sample)
    print(f"Sample : {sample}, Predicted : {prediction}")

Sample : [20, 'low', 'yes', 'excellent'], Predicted : yes
Sample : [30, 'high', 'no', 'fair'], Predicted : no
Sample : [40, 'medium', 'yes', 'fair'], Predicted : yes
Sample : [50, 'low', 'no', 'fair'], Predicted : yes
Sample : [25, 'high', 'yes', 'excellent'], Predicted : no
Sample : [35, 'medium', 'no', 'fair'], Predicted : yes
Sample : [45, 'low', 'yes', 'excellent'], Predicted : no
Sample : [55, 'high', 'no', 'fair'], Predicted : yes
Sample : [30, 'high', 'no', 'fair'], Predicted : no
Sample : [30, 'high', 'no', 'excellent'], Predicted : no
Sample : [31, 'medium', 'no', 'fair'], Predicted : yes
Sample : [40, 'low', 'no', 'fair'], Predicted : yes
Sample : [40, 'low', 'yes', 'fair'], Predicted : yes
Sample : [40, 'low', 'yes', 'excellent'], Predicted : no
Sample : [31, 'medium', 'yes', 'excellent'], Predicted : yes
Sample : [30, 'high', 'no', 'fair'], Predicted : no
Sample : [30, 'medium', 'yes', 'fair'], Predicted : yes
Sample : [31, 'medium', 'yes', 'excellent'], Predicted : yes
Sam