**Prompt:**  

`create a decision tree algorithm in python for a dataset with 3 features, gender (male, female), age (integer), salary (double), and a target variable (1 or 0). Without using scikitlear`

**2o Prompt Solicitando ajuste:**

`I got the following error in _predict_one: cannot unpack non-iterable numpy.int32 object`


# Implementação Árvore de Decisão

In [8]:
import numpy as np

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._build_tree(X, y, depth=0)

    def _build_tree(self, X, y, depth):
        n_samples, n_features = X.shape
        unique_classes, counts = np.unique(y, return_counts=True)

        # Check stopping conditions
        if depth == self.max_depth or len(unique_classes) == 1:
            return unique_classes[np.argmax(counts)]

        if n_samples <= 1:
            return unique_classes[np.argmax(counts)]

        # Find the best split
        best_split = None
        best_gini = 1.0

        for feature_idx in range(n_features):
            values = np.unique(X[:, feature_idx])
            for value in values:
                left_mask = X[:, feature_idx] <= value
                right_mask = X[:, feature_idx] > value

                if len(y[left_mask]) == 0 or len(y[right_mask]) == 0:
                    continue

                gini = self._gini_impurity(y[left_mask], y[right_mask])

                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature_idx, value)

        if best_gini == 1.0:
            return unique_classes[np.argmax(counts)]

        feature_idx, value = best_split
        left_mask = X[:, feature_idx] <= value
        right_mask = X[:, feature_idx] > value

        left_subtree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
        right_subtree = self._build_tree(X[right_mask], y[right_mask], depth + 1)

        return (feature_idx, value, left_subtree, right_subtree)

    def _gini_impurity(self, left_labels, right_labels):
        p_left = len(left_labels) / (len(left_labels) + len(right_labels))
        p_right = len(right_labels) / (len(left_labels) + len(right_labels))

        gini_left = 1.0 - sum((np.sum(left_labels == c) / len(left_labels))**2 for c in np.unique(left_labels))
        gini_right = 1.0 - sum((np.sum(right_labels == c) / len(right_labels))**2 for c in np.unique(right_labels))

        gini_impurity = p_left * gini_left + p_right * gini_right

        return gini_impurity

    def predict(self, X):
        predictions = []
        for sample in X:
            predictions.append(self._predict_one(sample, self.tree))
        return np.array(predictions)

    def _predict_one(self, sample, node):
        if not isinstance(node, tuple):  # Leaf node
            return node
        
        feature_idx, value, left_subtree, right_subtree = node
        
        if sample[feature_idx] <= value:
            return self._predict_one(sample, left_subtree)
        else:
            return self._predict_one(sample, right_subtree)

# Sample dataset
X = np.array([
    ["male", 25, 60000],
    ["female", 30, 80000],
    ["male", 35, 75000],
    ["female", 22, 50000],
    ["male", 40, 90000]
])

# Convert gender to binary (0 for male, 1 for female)
X[:, 0] = np.where(X[:, 0] == "male", 0, 1)

y = np.array([0, 1, 1, 0, 1])

# Create and fit the decision tree
tree = DecisionTree(max_depth=3)
tree.fit(X, y)

# Predictions
new_data = np.array([
    ["male", 27, 65000],
    ["female", 32, 70000]
])

new_data[:, 0] = np.where(new_data[:, 0] == "male", 0, 1)

predictions = tree.predict(new_data)

print("Predictions:", predictions)



Predictions: [1 1]
