In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [4]:
# Load data
iris = load_iris()
X, y = iris.data, iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [5]:
# CART - Gini (default)
cart = DecisionTreeClassifier(criterion='gini')
cart.fit(X_train, y_train)

# Predict & Evaluate
y_pred = cart.predict(X_test)
print("CART Accuracy:", accuracy_score(y_test, y_pred))


CART Accuracy: 0.9666666666666667


In [6]:
# ID3 with entropy using scikit-learn
id3 = DecisionTreeClassifier(criterion='entropy')  # uses Information Gain
id3.fit(X_train, y_train)

# Predict & Evaluate
y_pred_id3 = id3.predict(X_test)
print("ID3 Accuracy:", accuracy_score(y_test, y_pred_id3))


ID3 Accuracy: 0.9666666666666667


In [10]:
import numpy as np
from collections import Counter

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value  # only for leaf nodes

    def is_leaf(self):
        return self.value is not None

class DTC:
    def __init__(self, criterion='gini', max_depth=3):
        self.criterion = criterion
        self.max_depth = max_depth
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y)

    def _impurity(self, y):
        counts = np.bincount(y)
        probs = counts / len(y)
        if self.criterion == 'gini':
            return 1 - np.sum(probs**2)
        elif self.criterion == 'entropy':
            return -np.sum([p * np.log2(p) for p in probs if p > 0])
        else:
            raise ValueError("Unknown criterion")

    def _information_gain(self, y, left_y, right_y):
        H = self._impurity(y)
        w_l, w_r = len(left_y) / len(y), len(right_y) / len(y)
        return H - (w_l * self._impurity(left_y) + w_r * self._impurity(right_y))

    def _best_split(self, X, y):
        best_gain, best_feat, best_thresh = -1, None, None
        for feat in range(X.shape[1]):
            thresholds = np.unique(X[:, feat])
            for t in thresholds:
                left_idx = X[:, feat] <= t
                right_idx = X[:, feat] > t
                if np.sum(left_idx) == 0 or np.sum(right_idx) == 0:
                    continue
                gain = self._information_gain(y, y[left_idx], y[right_idx])
                if gain > best_gain:
                    best_gain, best_feat, best_thresh = gain, feat, t
        return best_feat, best_thresh

    def _grow_tree(self, X, y, depth=0):
        if len(set(y)) == 1 or depth >= self.max_depth:
            leaf_value = Counter(y).most_common(1)[0][0]
            return Node(value=leaf_value)

        feat, thresh = self._best_split(X, y)
        if feat is None:
            return Node(value=Counter(y).most_common(1)[0][0])

        left_idx = X[:, feat] <= thresh
        right_idx = X[:, feat] > thresh

        left = self._grow_tree(X[left_idx], y[left_idx], depth + 1)
        right = self._grow_tree(X[right_idx], y[right_idx], depth + 1)
        return Node(feat, thresh, left, right)

    def _predict(self, x, node):
        if node.is_leaf():
            return node.value
        if x[node.feature] <= node.threshold:
            return self._predict(x, node.left)
        else:
            return self._predict(x, node.right)

    def predict(self, X):
        return np.array([self._predict(x, self.root) for x in X])


In [11]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = load_iris()
X, y = iris.data, iris.target

# For simplicity, only use 2 classes
X, y = X[y != 2], y[y != 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# ID3 (entropy)
tree_entropy = DTC(criterion='entropy', max_depth=3)
tree_entropy.fit(X_train, y_train)
pred_entropy = tree_entropy.predict(X_test)
print("ID3 (entropy) Accuracy:", accuracy_score(y_test, pred_entropy))

# CART (gini)
tree_gini = DTC(criterion='gini', max_depth=3)
tree_gini.fit(X_train, y_train)
pred_gini = tree_gini.predict(X_test)
print("CART (gini) Accuracy:", accuracy_score(y_test, pred_gini))


ID3 (entropy) Accuracy: 1.0
CART (gini) Accuracy: 1.0
