In [1]:
import numpy as np
import pandas as pd
from collections import Counter

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature  # Feature to split on
        self.threshold = threshold  # Threshold for decision
        self.left = left  # Left subtree
        self.right = right  # Right subtree
        self.value = value  # Class label for leaf nodes

class DecisionTree:
    def __init__(self, max_depth=10, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.root = None
    
    def entropy(self, y):
        counts = np.bincount(y)
        probabilities = counts / len(y)
        return -np.sum([p * np.log2(p) for p in probabilities if p > 0])
    
    def best_split(self, X, y):
        best_gain = -1
        best_split = None
        best_feature = None
        n_samples, n_features = X.shape
        parent_entropy = self.entropy(y)
        
        for feature in range(n_features):
            thresholds = np.unique(X[:, feature])
            for threshold in thresholds:
                left_indices = X[:, feature] <= threshold
                right_indices = X[:, feature] > threshold
                if np.sum(left_indices) == 0 or np.sum(right_indices) == 0:
                    continue
                
                left_entropy = self.entropy(y[left_indices])
                right_entropy = self.entropy(y[right_indices])
                
                weighted_entropy = (np.sum(left_indices) / n_samples) * left_entropy + (np.sum(right_indices) / n_samples) * right_entropy
                
                information_gain = parent_entropy - weighted_entropy
                
                if information_gain > best_gain:
                    best_gain = information_gain
                    best_split = threshold
                    best_feature = feature
        
        return best_feature, best_split
    
    def build_tree(self, X, y, depth=0):
        if depth >= self.max_depth or len(np.unique(y)) == 1 or len(y) < self.min_samples_split:
            return Node(value=Counter(y).most_common(1)[0][0])
        
        feature, threshold = self.best_split(X, y)
        if feature is None:
            return Node(value=Counter(y).most_common(1)[0][0])
        
        left_indices = X[:, feature] <= threshold
        right_indices = X[:, feature] > threshold
        
        left_subtree = self.build_tree(X[left_indices], y[left_indices], depth + 1)
        right_subtree = self.build_tree(X[right_indices], y[right_indices], depth + 1)
        
        return Node(feature, threshold, left_subtree, right_subtree)
    
    def fit(self, X, y):
        self.root = self.build_tree(X, y)
    
    def predict_one(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature] <= node.threshold:
            return self.predict_one(x, node.left)
        return self.predict_one(x, node.right)
    
    def predict(self, X):
        return np.array([self.predict_one(x, self.root) for x in X])

# Load Dataset (assuming it's in a CSV file)
df = pd.read_csv('future_career_advice_final.csv')
X = df.drop(columns=["Best_Suited_Profession"]).values  # Features
y = df["Best_Suited_Profession"].values  # Target

# Train Decision Tree Model
dt = DecisionTree(max_depth=5)
dt.fit(X, y)

# Predictions
y_pred = dt.predict(X)

# Evaluate Model
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y, y_pred))

TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'