In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('pw7.csv')

lbl = df.columns.tolist()
X = df.drop(columns=['Student ID', 'Exam Performance'])
Y = df['Exam Performance']

X['Study Hours'] = pd.cut(X['Study Hours'], bins=3, labels=['Low', 'Medium', 'High'])
X['Attendance Percentage'] = pd.cut(X['Attendance Percentage'], bins=3, labels=['Low', 'Medium', 'High'])
X['Previous Semester Score'] = pd.cut(X['Previous Semester Score'], bins=3, labels=['Low', 'Medium', 'High'])
X['Sleep Hours per Night'] = pd.cut(X['Sleep Hours per Night'], bins=3, labels=['Low', 'Medium', 'High'])
data = pd.concat([X, Y], axis=1)

In [38]:
def entropy(y):
    v, c = np.unique(y, return_counts=True)
    p = c / c.sum()
    return -np.sum(p * np.log2(p))

def info_gain(df, attr, target):
    vals, counts = np.unique(df[attr], return_counts=True)
    ew_entropy = np.sum([(counts[i] / np.sum(counts)) * entropy(df[df[attr] == vals[i]][target]) for i in range(len(vals))])
    return entropy(df[target]) - ew_entropy

def id3(df, features, target):
    vals, counts = np.unique(df[target], return_counts=True)
    if len(vals) == 1:
        return vals[0]
    if len(features) == 0:
        return vals[np.argmax(counts)]
    gains = [info_gain(df, f, target) for f in features]
    best = features[np.argmax(gains)]
    tree = {best: {}}
    for v in np.unique(df[best]):
        sub = df[df[best] == v].drop(columns=[best])
        tree[best][v] = id3(sub, [f for f in features if f != best], target)
    return tree

def split_info(df, attr):
    vals, counts = np.unique(df[attr], return_counts=True)
    p = counts / counts.sum()
    return -np.sum(p * np.log2(p))

def gain_ratio(df, attr, target):
    ig = info_gain(df, attr, target)
    si = split_info(df, attr)
    return ig / si if si != 0 else 0

def c45(df, features, target):
    vals, counts = np.unique(df[target], return_counts=True)
    if len(vals) == 1:
        return vals[0]
    if len(features) == 0:
        return vals[np.argmax(counts)]
    ratios = [gain_ratio(df, f, target) for f in features]
    best = features[np.argmax(ratios)]
    tree = {best: {}}
    for v in np.unique(df[best]):
        sub = df[df[best] == v].drop(columns=[best])
        tree[best][v] = c45(sub, [f for f in features if f != best], target)
    return tree

def gini(y):
    vals, counts = np.unique(y, return_counts=True)
    prob = counts / counts.sum()
    return 1 - np.sum(prob ** 2)

def best_split(df, features, target):
    min_gini = float('inf')
    best_feature = None
    best_value = None
    
    for f in features:
        try:
            values = df[f].astype(float).sort_values().unique()
        except ValueError:
            continue
        for i in range(1, len(values)):
            v = (values[i - 1] + values[i]) / 2
            left = df[df[f] <= v]
            right = df[df[f] > v]
            
            if len(left) == 0 or len(right) == 0:
                continue
            
            g = (len(left) / len(df)) * gini(left[target]) + \
                (len(right) / len(df)) * gini(right[target])
            
            if g < min_gini:
                min_gini = g
                best_feature = f
                best_value = v
                
    return best_feature, best_value

def cart(df, features, target):
    vals, counts = np.unique(df[target], return_counts=True)
    
    if len(vals) == 1:
        return vals[0]
    
    if len(features) == 0:
        return vals[np.argmax(counts)]
    
    f, v = best_split(df, features, target)
    if f is None:
        return vals[np.argmax(counts)]

    tree = {(f, v): {}}
    left = df[df[f] <= v]
    right = df[df[f] > v]
    tree[(f, v)]['<='] = cart(left, features, target)
    tree[(f, v)]['>'] = cart(right, features, target)
    return tree


In [39]:
numeric_features = [col for col in data.columns if col != 'Exam Performance']
id3tree = id3(data, list(X.columns), 'Exam Performance')
c45tree = c45(data, list(X.columns), 'Exam Performance')
carttree = cart(data, list(X.columns), 'Exam Performance')
def predict(tree, row):
    while isinstance(tree, dict):
        node = next(iter(tree))

        if isinstance(node, tuple) and len(node) == 2:
            f, v = node
            try:
                val = float(row[f])
            except:
                val = row[f]
            if val <= v:
                tree = tree[node]['<=']
            else:
                tree = tree[node]['>']

        elif isinstance(node, str):
            f = node
            val = row.get(f)
            subtree = tree[f].get(val)
            if subtree is None:
                subtree = list(tree[f].values())[0]
            tree = subtree
        else:
            break

    return tree

def predict_new_student(trees, names, columns):
    print("Enter details for a new student:")
    new_data = {}
    for col in columns:
        val = input(f"{col}: ")
        new_data[col] = val

    print("\nPredictions:")
    for i in range(len(trees)):
        pred = predict(trees[i], new_data)
        print(f"{names[i]} Prediction: {pred}")

id3tree = id3(data, list(X.columns), 'Exam Performance')
c45tree = c45(data, list(X.columns), 'Exam Performance')
carttree = cart(data, list(X.columns), 'Exam Performance')
predict_new_student([id3tree, c45tree, carttree], ['ID3', 'C4.5', 'CART'], list(X.columns))

Enter details for a new student:

Predictions:
ID3 Prediction: 1
C4.5 Prediction: 1
CART Prediction: 0
