In [84]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Assuming you have defined X_train_np, y_train_np, X_test_np, y_test earlier in your code
# Read the data
nflx_data = pd.read_csv("Download Data - STOCK_US_XNAS_NFLX.csv")

# Preprocess the data
nflx_data['Volume'] = nflx_data['Volume'].str.replace(',', '').astype(float)
median_volume = nflx_data['Volume'].median()
nflx_data['High_Volume'] = (nflx_data['Volume'] > median_volume).astype(int)

# Feature Selection
X = nflx_data[['Open', 'Close', 'High', 'Low']]
y = nflx_data['High_Volume']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Example usage
dt_entropy_classifier_scratch = DecisionTreeEntropy(max_depth=3)
dt_entropy_classifier_scratch.fit(X_train.values, y_train.values)

# Predicting on the test set
y_pred_dt_entropy_scratch = dt_entropy_classifier_scratch.predict(X_test.values)
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTreeEntropy:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        self.tree = self._grow_tree(X, y, depth=0)

    def _grow_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        unique_classes = np.unique(y)

        # Stop conditions
        if len(unique_classes) == 1:
            return Node(value=unique_classes[0])

        if depth == self.max_depth:
            return Node(value=self._majority_vote(y))

        # Find the best split
        best_feature, best_threshold = self._find_best_split(X, y)

        # Split the data
        left_mask = X[:, best_feature] <= best_threshold
        right_mask = ~left_mask

        left_subtree = self._grow_tree(X[left_mask], y[left_mask], depth + 1)
        right_subtree = self._grow_tree(X[right_mask], y[right_mask], depth + 1)

        # Create the current node
        return Node(feature=best_feature, threshold=best_threshold, left=left_subtree, right=right_subtree)

    def _find_best_split(self, X, y):
        num_samples, num_features = X.shape
        classes = np.unique(y)
        best_info_gain = -float('inf')
        best_feature = None
        best_threshold = None

        for feature in range(num_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_mask = X[:, feature] <= threshold
                right_mask = ~left_mask

                info_gain = self._information_gain(y, y[left_mask], y[right_mask])

                if info_gain > best_info_gain:
                    best_info_gain = info_gain
                    best_feature = feature
                    best_threshold = threshold

        return best_feature, best_threshold

    def _information_gain(self, parent, left_child, right_child):
        entropy_parent = self._entropy(parent)
        entropy_left = len(left_child) / len(parent) * self._entropy(left_child)
        entropy_right = len(right_child) / len(parent) * self._entropy(right_child)

        return entropy_parent - entropy_left - entropy_right

    def _entropy(self, y):
        classes, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        entropy = -np.sum(probabilities * np.log2(probabilities + 1e-10))  # Adding a small value to prevent log(0)
        return entropy

    def _majority_vote(self, y):
        classes, counts = np.unique(y, return_counts=True)
        majority_class = classes[np.argmax(counts)]
        return majority_class

    def predict(self, X):
        return np.array([self._predict_tree(x, self.tree) for x in X])

    def _predict_tree(self, x, node):
        if node.value is not None:
            return node.value

        if x[node.feature] <= node.threshold:
            return self._predict_tree(x, node.left)
        else:
            return self._predict_tree(x, node.right)

# Example usage

dt_entropy_classifier_scratch = DecisionTreeEntropy(max_depth=3)
dt_entropy_classifier_scratch.fit(X_train.values, y_train.values)

# Predicting on the test set
y_pred_dt_entropy_scratch = dt_entropy_classifier_scratch.predict(X_test.values)

# Model Evaluation
accuracy_dt_entropy_scratch = accuracy_score(y_test, y_pred_dt_entropy_scratch)
precision_dt_entropy_scratch = precision_score(y_test, y_pred_dt_entropy_scratch, zero_division=0)
recall_dt_entropy_scratch = recall_score(y_test, y_pred_dt_entropy_scratch)
conf_matrix_dt_entropy_scratch = confusion_matrix(y_test, y_pred_dt_entropy_scratch)

print("Entropy-based Decision Tree:")
print("Accuracy:", accuracy_dt_entropy_scratch)
print("Precision:", precision_dt_entropy_scratch)
print("Recall:", recall_dt_entropy_scratch)
print("Confusion Matrix:")
print(conf_matrix_dt_entropy_scratch)


Entropy-based Decision Tree:
Accuracy: 0.2
Precision: 0.0
Recall: 0.0
Confusion Matrix:
[[1 0]
 [4 0]]
