### AP21110010200
### AP21110010219
### AP21110010228
### AP21110010253
### AP21110010260

In [None]:
import numpy as np

class Node:
    def __init__(self, feature_index=None, threshold=None, left=None, right=None, value=None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.n_classes = len(np.unique(y))
        self.n_features = X.shape[1]
        self.tree = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        n_samples_per_class = [np.sum(y == i) for i in range(self.n_classes)]
        predicted_class = np.argmax(n_samples_per_class)
        node = Node(value=predicted_class)

        if depth < self.max_depth:
            best_gain = 0
            best_feature_idx = None
            best_threshold = None

            for feature_idx in range(self.n_features):
                thresholds = np.unique(X[:, feature_idx])
                for threshold in thresholds:
                    gain = self._information_gain(X, y, feature_idx, threshold)
                    if gain > best_gain:
                        best_gain = gain
                        best_feature_idx = feature_idx
                        best_threshold = threshold

            if best_gain > 0:
                indices_left = X[:, best_feature_idx] < best_threshold
                X_left, y_left = X[indices_left], y[indices_left]
                X_right, y_right = X[~indices_left], y[~indices_left]
                node = Node(feature_index=best_feature_idx, threshold=best_threshold,
                            left=self._grow_tree(X_left, y_left, depth + 1),
                            right=self._grow_tree(X_right, y_right, depth + 1))

        return node

    def _entropy(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return -np.sum(probabilities * np.log2(probabilities))

    def _information_gain(self, X, y, feature_idx, threshold):
        parent_entropy = self._entropy(y)
        indices_left = X[:, feature_idx] < threshold
        y_left, y_right = y[indices_left], y[~indices_left]

        if len(y_left) == 0 or len(y_right) == 0:
            return 0

        n = len(y)
        n_left, n_right = len(y_left), len(y_right)
        entropy_left = self._entropy(y_left)
        entropy_right = self._entropy(y_right)

        child_entropy = (n_left / n) * entropy_left + (n_right / n) * entropy_right
        return parent_entropy - child_entropy

    def _predict(self, x, tree):
        if tree.value is not None:
            return tree.value
        feature_value = x[tree.feature_index]
        branch = tree.left if feature_value < tree.threshold else tree.right
        return self._predict(x, branch)

    def predict(self, X):
        predictions = [self._predict(x, self.tree) for x in X]
        return np.array(predictions)

# Example usage with Iris dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the decision tree
tree = DecisionTree(max_depth=3)
tree.fit(X_train, y_train)

# Make predictions on the testing set
predictions = tree.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

