# Trabajo Práctico 1

Estudiantes:

- Alonso Araya Calvo
- Pedro Soto
- Sofia Oviedo

# Imports

In [None]:
import random

import matplotlib.pyplot as plt
import numpy as np
import torch
from scipy.spatial.distance import jensenshannon
from sklearn.model_selection import train_test_split

# Cargando dataset

In [None]:
#tomado de https://www.kaggle.com/code/wailinnoo/intrusion-detection-system-using-kdd99-dataset
import pandas as pd
from tensorflow.keras.utils import get_file

try:
    path = get_file('kddcup.data_10_percent.gz',
                    origin='http://kdd.ics.uci.edu/databases/kddcup99/kddcup.data_10_percent.gz')
except:
    print('Error downloading')
    raise

print(path)

# This file is a CSV, just no CSV extension or headers
# Download from: http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html
pd_data_frame = pd.read_csv(path, header=None)

# The CSV file has no column heads, so add them
pd_data_frame.columns = [
    'duration',
    'protocol_type',
    'service',
    'flag',
    'src_bytes',
    'dst_bytes',
    'land',
    'wrong_fragment',
    'urgent',
    'hot',
    'num_failed_logins',
    'logged_in',
    'num_compromised',
    'root_shell',
    'su_attempted',
    'num_root',
    'num_file_creations',
    'num_shells',
    'num_access_files',
    'num_outbound_cmds',
    'is_host_login',
    'is_guest_login',
    'count',
    'srv_count',
    'serror_rate',
    'srv_serror_rate',
    'rerror_rate',
    'srv_rerror_rate',
    'same_srv_rate',
    'diff_srv_rate',
    'srv_diff_host_rate',
    'dst_host_count',
    'dst_host_srv_count',
    'dst_host_same_srv_rate',
    'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate',
    'dst_host_srv_diff_host_rate',
    'dst_host_serror_rate',
    'dst_host_srv_serror_rate',
    'dst_host_rerror_rate',
    'dst_host_srv_rerror_rate',
    'outcome'
]

# Limpieza del dataset y generación de subset del dataset

In [None]:
# For now, just drop NA's (rows with missing values), in case there are
pd_data_frame.dropna(inplace=True, axis=1)

# Checking for DUPLICATE values
pd_data_frame.drop_duplicates(keep='first', inplace=True)

In [None]:
# Filter the DataFrame to keep only 'normal.' and 'back.' outcomes
filtered_df = pd_data_frame[pd_data_frame['outcome'].isin(['normal.', 'back.'])].copy()

In [None]:
list_nominal_features = ["flag", "protocol_type", "service"]

# Apply one-hot encoding to the nominal features
df_encoded = pd.get_dummies(filtered_df, columns=list_nominal_features)

# Convert boolean columns (from one-hot encoding) to integers (0 or 1) in df_encoded
for col in df_encoded.columns:
    if df_encoded[col].dtype == 'bool':
        df_encoded[col] = df_encoded[col].astype(int)

In [None]:
df_attacks = df_encoded[df_encoded['outcome'] == 'back.'].copy()
df_no_attacks = df_encoded[df_encoded['outcome'] == 'normal.'].copy()

# Parte 1 Análisis Descriptivo

## 1a Análisis de momentos estadísticos

- Media
- Desviación estándar
- Inclinación
- Kurtosis

In [None]:
df_attacks_without_outcome = df_attacks.drop('outcome', axis=1)
df_normal_without_outcome = df_no_attacks.drop('outcome', axis=1)

attack_without_outcomes_column_names = df_attacks_without_outcome.columns
attack_tensor = torch.tensor(df_attacks_without_outcome.values, dtype=torch.float32)

normal_without_outcomes_column_names = df_normal_without_outcome.columns
no_attack_tensor = torch.tensor(df_normal_without_outcome.values, dtype=torch.float32)

In [None]:
def calculate_moments(dataset_tensor, feature_names):
    means = torch.mean(dataset_tensor, dim=0)
    stds = torch.std(dataset_tensor, dim=0)

    z = (dataset_tensor - means) / stds
    z = torch.where(torch.isfinite(z), z, torch.zeros_like(z))

    skews = torch.mean(z ** 3, dim=0)
    kurtosis = torch.mean(z ** 4, dim=0) - 3

    stats_df = pd.DataFrame({
        "Media": means.numpy(),
        "Desviación Estándar": stds.numpy(),
        "Inclinación": skews.numpy(),
        "Kurtosis": kurtosis.numpy()
    }, index=feature_names)

    display(stats_df)

### Momentos Estadísticos para Datos de Ataque

In [None]:
calculate_moments(attack_tensor, attack_without_outcomes_column_names)

In [None]:
stats_df = pd.DataFrame(index=df_attacks_without_outcome.columns)
stats_df["Mean"] = df_attacks_without_outcome.mean()
stats_df["Std"] = df_attacks_without_outcome.std()
stats_df["Skewness"] = df_attacks_without_outcome.skew()
stats_df["Kurtosis"] = df_attacks_without_outcome.kurt()

display(stats_df)

### Momentos Estadísticos para Paquetes Normales

In [None]:
calculate_moments(no_attack_tensor, normal_without_outcomes_column_names)

In [None]:
stats_df = pd.DataFrame(index=df_normal_without_outcome.columns)
stats_df["Mean"] = df_normal_without_outcome.mean()
stats_df["Std"] = df_normal_without_outcome.std()
stats_df["Skewness"] = df_normal_without_outcome.skew()
stats_df["Kurtosis"] = df_normal_without_outcome.kurt()

display(stats_df)

## 1b Histogramas y Distancia Jensen Shannon

### Histogramas para datos de ataque backdoor

In [None]:
def generate_feature_histogram_and_calculate_jensen_shannon(df_normal, df_backdoor, feature_names, bins=30):
    js_distances = []

    for feat in feature_names:
        normal_df_values = df_normal[feat].values
        backdoor_df_values = df_backdoor[feat].values

        limits_histogram = (min(normal_df_values.min(), backdoor_df_values.min()),
                            max(normal_df_values.max(), backdoor_df_values.max()))

        hist_normal, _ = np.histogram(normal_df_values, bins=bins, range=limits_histogram)
        hist_backdoor, _ = np.histogram(backdoor_df_values, bins=bins, range=limits_histogram)

        hist_normal = hist_normal / hist_normal.sum()
        hist_backdoor = hist_backdoor / hist_backdoor.sum()

        jsd = jensenshannon(hist_normal, hist_backdoor)
        js_distances.append(jsd)

        plt.figure(figsize=(6, 4))
        plt.hist(normal_df_values, bins=bins, range=limits_histogram, alpha=0.5, label="Normal", color='blue',
                 density=True)
        plt.hist(backdoor_df_values, bins=bins, range=limits_histogram, alpha=0.5, label="Backdoor", color='red',
                 density=True)
        plt.title(f"Histograma de {feat} (Distancia JS: {jsd:.4f})")
        plt.xlabel("Valor")
        plt.ylabel("Densidad")
        plt.legend()
        plt.show()

    js_df = pd.DataFrame({
        "Feature": feature_names,
        "DistanciaJS": js_distances
    }).sort_values(by="DistanciaJS", ascending=False)

    display(js_df)

In [None]:
generate_feature_histogram_and_calculate_jensen_shannon(df_normal_without_outcome, df_attacks_without_outcome,
                                                        attack_without_outcomes_column_names)

# Parte 2 Implementación de la clasificación multi-clase con árboles de decisión

## Implementación final de las clases NodeCart y Cart

In [None]:
class NodeCart:
    def __init__(self, num_classes=2, ref_cart=None, current_depth=0):
        """
        Create the node attributes
        param num_classes: K number of classes to classify
        param ref_cart: reference to the tree containing the node
        param current_depth: current depth of the node in the tree
        """
        self.ref_cart = ref_cart
        self.threshold_value = 0
        self.feature_num = 0
        self.node_right = None
        self.node_left = None
        self.data_torch_partition = None
        self.gini = 0
        self.dominant_class = None
        self.accuracy_dominant_class = None
        self.num_classes = num_classes
        self.current_depth = current_depth

    def to_xml(self, current_str=""):
        """
        Recursive function to write the node content to a xml formatted string
        param current_str : the xml content so far in the whole tree
        return the string with the node content
        """
        str_node = "<node><thresh>" + str(self.threshold_value) + "</thresh>" + "<feature>" + str(
            self.feature_num) + "</feature><depth>" + str(self.current_depth) + "</depth>"
        str_node += "<gini>" + str(self.gini) + "</gini>"
        if self.node_right is not None:
            str_left = self.node_right.to_xml(current_str)
            str_node += str_left
        if self.node_left is not None:
            str_right = self.node_left.to_xml(current_str)
            str_node += str_right

        if self.is_leaf():
            str_node += "<dominant_class>" + str(self.dominant_class) + "</dominant_class><acc_dominant_class>" + str(
                self.accuracy_dominant_class) + "</acc_dominant_class>"
        str_node += "</node>"
        return str_node

    def is_leaf(self):
        """
        Checks whether the node is a leaf
        """
        return self.node_left is None and self.node_right is None

    def create_with_children(self, data_torch, current_depth, min_gini=0.000001):
        """
        Creates a node by selecting the best feature and threshold, and if needed, creating its children
        param data_torch: dataset with the current partition to deal with in the node
        param current_depth: depth counter for the node
        param min_gini: hyperparameter selected by the user defining the minimum tolerated Gini coefficient for a  node
        return the list of selected features so far
        """
        labels = data_torch[:, -1].long()

        self.dominant_class = torch.mode(labels)[0].item()
        self.gini = self.calculate_gini(labels, self.num_classes)

        list_selected_features = []

        if (current_depth >= self.ref_cart.get_max_depth() or
                data_torch.shape[0] <= self.ref_cart.get_min_observations() or
                self.gini <= min_gini):
            return list_selected_features

        threshold, feature_idx, min_gini_split = self.select_best_feature_and_thresh(data_torch, self.num_classes)

        if feature_idx is None or min_gini_split >= self.gini:
            return list_selected_features

        self.feature_num = feature_idx
        self.threshold_value = threshold

        list_selected_features.append(feature_idx)

        features = data_torch[:, :-1]
        feature_values = features[:, feature_idx]

        left_mask = feature_values < threshold
        right_mask = ~left_mask

        left_data = data_torch[left_mask]
        right_data = data_torch[right_mask]

        if left_data.shape[0] > 0:
            self.node_left = NodeCart(self.num_classes, self.ref_cart, current_depth + 1)
            left_features = self.node_left.create_with_children(left_data, current_depth + 1, min_gini)
            list_selected_features.extend(left_features)

        if right_data.shape[0] > 0:
            self.node_right = NodeCart(self.num_classes, self.ref_cart, current_depth + 1)
            right_features = self.node_right.create_with_children(right_data, current_depth + 1, min_gini)
            list_selected_features.extend(right_features)

        return list_selected_features

    def select_best_feature_and_thresh(self, data_torch, num_classes=2):
        """
        Selects the best feature and threshold that minimizes the Gini coefficient
        param data_torch: dataset partition to analyze
        param num_classes: number of K classes to discriminate from
        return min_thresh, min_feature, min_gini found for the dataset partition when
        selecting the found feature and threshold
        """
        features = data_torch[:, :-1]
        labels = data_torch[:, -1].long()

        best_gini = float('inf')
        best_feature = None
        best_thresh = None

        for feature_idx in range(features.shape[1]):
            feature_values = features[:, feature_idx]
            unique_values = torch.unique(feature_values, sorted=True)

            for i in range(len(unique_values) - 1):
                threshold = (unique_values[i] + unique_values[i + 1]) / 2

                left_mask = feature_values < threshold
                right_mask = ~left_mask

                if left_mask.sum() == 0 or right_mask.sum() == 0:
                    continue

                left_labels = labels[left_mask]
                right_labels = labels[right_mask]

                weighted_gini = self.weighted_gini(left_labels, right_labels, num_classes)

                if weighted_gini < best_gini:
                    best_gini = weighted_gini
                    best_feature = feature_idx
                    best_thresh = threshold.item()

        return best_thresh, best_feature, best_gini

    def calculate_gini(self, data_partition_torch, num_classes=2):
        """
        Calculates the Gini coefficient for a given partition with the given number of classes
        param data_partition_torch: current dataset partition as a tensor
        param num_classes: K number of classes to discriminate from
        returns the calculated Gini coefficient
        """
        if data_partition_torch.numel() == 0:
            return 0.0

        class_counts = torch.bincount(data_partition_torch, minlength=num_classes).float()
        proportions = class_counts / class_counts.sum()
        gini_score = 1.0 - torch.sum(proportions ** 2)
        return gini_score.item()

    def weighted_gini(self, left_side, right_side, num_classes=2):
        n = left_side.numel() + right_side.numel()
        if n == 0:
            return 0.0
        gini_left = self.calculate_gini(left_side, num_classes)
        gini_right = self.calculate_gini(right_side, num_classes)
        return (left_side.numel() / n) * gini_left + (right_side.numel() / n) * gini_right

    def evaluate_node(self, input_torch):
        """
        Evaluates an input observation within the node.
        If is not a leaf node, send it to the corresponding node
        return predicted label
        """
        feature_val_input = input_torch[self.feature_num]
        if self.is_leaf():
            return self.dominant_class
        else:
            if feature_val_input < self.threshold_value:
                return self.node_left.evaluate_node(input_torch)
            else:
                return self.node_right.evaluate_node(input_torch)


class CART:
    def __init__(self, dataset_torch, max_cart_depth, min_observations=2):
        """
        CART has only one root node
        """
        #min observations per node
        self.min_observations = min_observations
        self.root = NodeCart(num_classes=2, ref_cart=self, current_depth=0)
        self.max_cart_depth = max_cart_depth
        self.list_selected_features = []

    def get_root(self):
        """
        Gets tree root
        """
        return self.root

    def get_min_observations(self):
        """
        return min observations per node
        """
        return self.min_observations

    def get_max_depth(self):
        """
        Gets the selected max depth of the tree
        """
        return self.max_cart_depth

    def build_cart(self, data_torch):
        """
        Build CART from root
        """
        self.list_selected_features = self.root.create_with_children(data_torch, current_depth=0)

    def to_xml(self, xml_file_name):
        """
        write Xml file with tree content
        """
        str_nodes = self.root.to_xml()
        file = open(xml_file_name, "w+")
        file.write(str_nodes)
        file.close()
        return str_nodes

    def evaluate_input(self, input_torch):
        """
        Evaluate a specific input in the tree and get the predicted class
        """
        return self.root.evaluate_node(input_torch)

## A) Implementación de calculo de Gini

### 1a Pruebas unitarias para el calculo de Gini

In [None]:
node = NodeCart(num_classes=2)
ones_tensor = torch.tensor([1, 1, 1, 1])
gini = node.calculate_gini(ones_tensor, num_classes=2)
assert gini == 0.0, f"Expected 0.0, got {gini}"
print("Test 1 Gini ✅")

variable_tensor = torch.tensor([0, 1, 2, 3])
gini = node.calculate_gini(variable_tensor, num_classes=4)
assert gini == 0.75, f"Expected 0.75, got {gini}"
print("Test 2 Gini ✅")

## B) Pruebas unitarias de select_best_feature_and_thresh

In [None]:
def test_select_best_feature_two_classes():
    data = torch.tensor([
        [1.0, 5.0, 0],
        [2.0, 3.0, 0],
        [3.0, 1.0, 1],
        [4.0, 2.0, 1],
    ], dtype=torch.float32)

    node = NodeCart(num_classes=2)
    thresh, feature_idx, gini_score = node.select_best_feature_and_thresh(data, num_classes=2)

    assert thresh is not None, "Threshold inválido"
    assert feature_idx is not None, "Feature inválido"
    assert 0 <= feature_idx < 2, f"Feature debe estar entre 0-1, obtuvo {feature_idx}"
    assert 1.0 <= thresh <= 4.0, f"Threshold debe estar dentro del rango esperado se obtuvo {thresh}"
    assert gini_score >= 0.0, f"Gini debe ser >= 0 se obtuvo {gini_score}"

    print(f"✅ Test 1: Feature={feature_idx}, Threshold={thresh:.3f}, Gini={gini_score:.3f}")


def test_select_best_feature_single_class():
    single_class_data = torch.tensor([
        [1.0, 2.0, 0],
        [2.0, 3.0, 0],
        [3.0, 1.0, 0],
        [4.0, 4.0, 0]
    ], dtype=torch.float32)

    node = NodeCart(num_classes=2)
    thresh, feature_idx, gini_score = node.select_best_feature_and_thresh(single_class_data, num_classes=2)

    if thresh is not None and feature_idx is not None:
        assert 0 <= feature_idx < 2, f"Feature inválido: {feature_idx}"
        assert gini_score >= 0.0, f"Gini debe ser >= 0 se obtuvo {gini_score}"
        print(f"✅ Test 2: Feature={feature_idx}, Threshold={thresh:.3f}, Gini={gini_score:.3f}")
    else:
        print("✅ Test 2: No se pudo encontrar un split")


print("Pruebas de select_best_feature_and_thresh:")
test_select_best_feature_two_classes()
test_select_best_feature_single_class()

## B) Pruebas unitarias de create_with_children

In [None]:
def test_create_with_children_normal_splitting():
    data = torch.tensor([
        [1.0, 10.0, 0],
        [1.5, 12.0, 0],
        [5.0, 2.0, 1],
        [6.0, 1.0, 1],
    ], dtype=torch.float32)

    tree = CART(dataset_torch=data, max_cart_depth=2, min_observations=1)
    root_node = tree.get_root()

    selected_features = root_node.create_with_children(data, current_depth=0)

    assert isinstance(selected_features, list), "Debería devolver una lista"
    assert len(selected_features) > 0, "Debería contener al menos una característica"
    assert not root_node.is_leaf(), "Root node no debería ser un leaf"
    assert root_node.feature_num is not None, "Feature number no debería estar vació"
    assert root_node.threshold_value is not None, "Threshold value no debería estar vació"
    assert root_node.feature_num >= 0, f"Feature number debería ser mayor o igual a cero, se obtuvo {root_node.feature_num}"

    has_left_child = root_node.node_left is not None
    has_right_child = root_node.node_right is not None
    assert has_left_child or has_right_child, "Debería tener al menos un hijo"

    for feature_idx in selected_features:
        assert 0 <= feature_idx < 2, f"Feature index {feature_idx} debería estar entre 0 y 1"

    print(f"✅ Test 1: Normal splitting - Selected features: {selected_features}, "
          f"Root feature: {root_node.feature_num}, Threshold: {root_node.threshold_value:.3f} "
          f"Root feature: {root_node.feature_num}, Threshold: {root_node.threshold_value:.3f}")


def test_create_with_children_min_gini_condition():
    data = torch.tensor([
        [1.0, 2.0, 0],
        [1.1, 2.1, 0],
        [1.2, 2.2, 0],
        [1.3, 2.3, 0],
    ], dtype=torch.float32)

    tree = CART(dataset_torch=data, max_cart_depth=3, min_observations=1)
    root_node = tree.get_root()

    selected_features = root_node.create_with_children(data, current_depth=0, min_gini=0.1)

    assert isinstance(selected_features, list), "Debería devolver una lista"
    assert len(selected_features) == 0, "Debería devolver una lista vacía"
    assert root_node.is_leaf(), "Debería ser  una hoja"
    assert root_node.gini < 0.1, f"Gibi debería ser menor a 0.1, se obtuvo {root_node.gini:.3f}"
    assert root_node.dominant_class == 0, "Debería ser la clase dominante 0"

    print(f"✅ Test 2: Min Gini - Gini: {root_node.gini:.3f}, Dominant class: {root_node.dominant_class}")


print("Pruebas de create_with_children:")
test_create_with_children_normal_splitting()
test_create_with_children_min_gini_condition()

## C) Implementación de TestCart y unit tests

In [None]:
def test_cart(tree, testset_torch):
    """
    Test a previously built CART
    """
    if testset_torch.shape[0] == 0:
        return 0.0

    test_features = testset_torch[:, :-1]
    true_labels = testset_torch[:, -1].long()

    correct_predictions = 0
    total_predictions = testset_torch.shape[0]

    for i in range(total_predictions):
        current_observation = test_features[i]
        predicted_label = tree.evaluate_input(current_observation)
        true_label = true_labels[i].item()

        if predicted_label == true_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_predictions
    return accuracy

In [None]:
def test_cart_perfect_predictions():
    train_data = torch.tensor([
        [1.0, 2.0, 0],
        [2.0, 3.0, 0],
        [3.0, 1.0, 1],
        [4.0, 2.0, 1],
    ], dtype=torch.float32)

    test_data = torch.tensor([
        [1.5, 2.5, 0],
        [3.5, 1.5, 1],
    ], dtype=torch.float32)

    tree = CART(dataset_torch=train_data, max_cart_depth=2, min_observations=1)
    tree.build_cart(train_data)

    accuracy = test_cart(tree, test_data)

    assert 0.0 <= accuracy <= 1.0, f"Accuracy debe estar entre 0 y 1 se obtuvo {accuracy}"
    print(f"✅ Test 1: Accuracy = {accuracy:.3f}")


def test_cart_invalid_dataset():
    train_data = torch.tensor([
        [1.0, 2.0, 0],
        [2.0, 3.0, 1],
        [3.0, 1.0, 0],
        [4.0, 2.0, 1],
    ], dtype=torch.float32)

    tree = CART(dataset_torch=train_data, max_cart_depth=1, min_observations=1)
    tree.build_cart(train_data)

    empty_test = torch.empty((0, 3), dtype=torch.float32)
    accuracy_empty = test_cart(tree, empty_test)
    assert accuracy_empty == 0.0, f"Debería dar accuracy 0.0 se obtuvo {accuracy_empty}"

    single_test = torch.tensor([[2.5, 1.5, 0]], dtype=torch.float32)
    accuracy_single = test_cart(tree, single_test)
    assert 0.0 <= accuracy_single <= 1.0, f"Accuracy inválida: {accuracy_single}"
    assert accuracy_single in [0.0, 1.0], "Con una observación, accuracy debería ser 0.0 o 1.0"

    print(f"✅ Test 2: Dataset vacío = {accuracy_empty}, Una observación = {accuracy_single}")


test_cart_perfect_predictions()
test_cart_invalid_dataset()

# 3. Evaluación del CART

## Implementación de funciones generales

In [None]:
def get_cart_kd99_dataset_tensor():
    complete_dataset = pd.concat([df_no_attacks, df_attacks], ignore_index=True)

    class_mapping = {'normal.': 0, 'back.': 1}
    complete_dataset['outcome'] = complete_dataset['outcome'].map(class_mapping)

    dataset_tensor = torch.tensor(complete_dataset.values, dtype=torch.float32)

    print(f"Dataset completo creado:")
    print(f"- Total de observaciones: {dataset_tensor.shape[0]}")
    print(f"- Número de características: {dataset_tensor.shape[1] - 1}")  # -1 por la etiqueta
    print(f"- Observaciones normales: {len(df_no_attacks)}")
    print(f"- Observaciones de backdoor: {len(df_attacks)}")
    print(f"- Distribución de clases: {complete_dataset['outcome'].value_counts().to_dict()}")

    return dataset_tensor

## 3a. Tasa de aciertos y F1-score promedio de todas las clases

In [None]:
def calculate_f1_score(y_true, y_pred, num_classes=2):
    f1_scores = []

    for class_id in range(num_classes):
        tp = torch.sum((y_true == class_id) & (y_pred == class_id)).float()
        fp = torch.sum((y_true != class_id) & (y_pred == class_id)).float()
        fn = torch.sum((y_true == class_id) & (y_pred != class_id)).float()

        precision = tp / (tp + fp) if (tp + fp) > 0 else torch.tensor(0.0)
        recall = tp / (tp + fn) if (tp + fn) > 0 else torch.tensor(0.0)

        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else torch.tensor(0.0)
        f1_scores.append(f1.item())

        class_name = "Normal" if class_id == 0 else "Backdoor"
        print(f"📊 {class_name}: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}")

    return sum(f1_scores) / len(f1_scores)

In [None]:
def evaluate_cart_complete(dataset_tensor, max_depth, min_observations=2):
    print(f"\n🌳 Evaluando CART con profundidad máxima = {max_depth}")
    print("-" * 50)

    cart = CART(dataset_torch=dataset_tensor,
                max_cart_depth=max_depth,
                min_observations=min_observations)

    import time
    start_time = time.time()
    cart.build_cart(dataset_tensor)
    training_time = time.time() - start_time

    start_time = time.time()
    features = dataset_tensor[:, :-1]
    true_labels = dataset_tensor[:, -1].long()

    predicted_labels = []
    for i in range(features.shape[0]):
        prediction = cart.evaluate_input(features[i])
        predicted_labels.append(prediction)

    predicted_labels = torch.tensor(predicted_labels)
    evaluation_time = time.time() - start_time

    accuracy = test_cart(cart, dataset_tensor)

    print(f"📊 Detalle por clase:")
    f1_score = calculate_f1_score(true_labels, predicted_labels, num_classes=2)

    # Mostrar resultados
    print(f"\n📊 Resultados generales:")
    print(f"   • Tasa de aciertos (Accuracy): {accuracy:.4f} ({accuracy * 100:.2f}%)")
    print(f"   • F1-Score promedio: {f1_score:.4f}")
    print(f"   • Tiempo de entrenamiento: {training_time:.4f} segundos")
    print(f"   • Tiempo de evaluación: {evaluation_time:.4f} segundos")
    print(f"   • Características seleccionadas: {len(cart.list_selected_features)}")
    print(f"   • Features utilizadas: {cart.list_selected_features}")

    return {
        'accuracy': accuracy,
        'f1_score': f1_score,
        'training_time': training_time,
        'evaluation_time': evaluation_time,
        'selected_features': cart.list_selected_features,
        'cart': cart
    }


print("=" * 60)
print("PARTE 3 - PUNTO 1: EVALUACIÓN DEL CART")
print("Usando el mismo conjunto de datos para entrenamiento y prueba")
print("=" * 60)

print(f"\n📋 Información de los dataframes originales:")
print(f"   • df_no_attacks (normal): {len(df_no_attacks)} observaciones")
print(f"   • df_attacks (backdoor): {len(df_attacks)} observaciones")
print(f"   • Clases en df_no_attacks: {df_no_attacks['outcome'].unique()}")
print(f"   • Clases en df_attacks: {df_attacks['outcome'].unique()}")

complete_dataset = get_cart_kd99_dataset_tensor()

print(f"\n📋 Configuración de evaluación:")
print(f"   • Mínimo 2 observaciones por hoja")
print(f"   • Evaluación con profundidades máximas: 3 y 4")

# Evaluar con profundidad máxima = 3
results_depth_3 = evaluate_cart_complete(complete_dataset, max_depth=3, min_observations=2)

# Evaluar con profundidad máxima = 4  
results_depth_4 = evaluate_cart_complete(complete_dataset, max_depth=4, min_observations=2)

# Resumen comparativo
print(f"\n📈 RESUMEN COMPARATIVO:")
print("=" * 60)
print(f"{'Métrica':<25} {'Profundidad 3':<15} {'Profundidad 4':<15}")
print("-" * 60)
print(f"{'Accuracy':<25} {results_depth_3['accuracy']:<15.4f} {results_depth_4['accuracy']:<15.4f}")
print(f"{'F1-Score':<25} {results_depth_3['f1_score']:<15.4f} {results_depth_4['f1_score']:<15.4f}")
print(
    f"{'Tiempo Entrenamiento (s)':<25} {results_depth_3['training_time']:<15.4f} {results_depth_4['training_time']:<15.4f}")
print(
    f"{'Tiempo Evaluación (s)':<25} {results_depth_3['evaluation_time']:<15.4f} {results_depth_4['evaluation_time']:<15.4f}")
print(
    f"{'Features Seleccionadas':<25} {len(results_depth_3['selected_features']):<15} {len(results_depth_4['selected_features']):<15}")

print(f"\n🎯 CONCLUSIONES:")
mejor_accuracy = "Profundidad 3" if results_depth_3['accuracy'] > results_depth_4['accuracy'] else "Profundidad 4"
mejor_f1 = "Profundidad 3" if results_depth_3['f1_score'] > results_depth_4['f1_score'] else "Profundidad 4"
print(f"   • Mejor Accuracy: {mejor_accuracy}")
print(f"   • Mejor F1-Score: {mejor_f1}")
print(
    f"   • El árbol con profundidad 4 utiliza {len(results_depth_4['selected_features']) - len(results_depth_3['selected_features'])} características adicionales")

# Guardar los árboles para análisis posterior
best_cart_depth_3 = results_depth_3['cart']
best_cart_depth_4 = results_depth_4['cart']

print("\n✅ Evaluación del punto 1 completada exitosamente!")

## 3b. Evaluación del CART compleja

In [None]:
def single_run_evaluation(dataset_tensor, max_depth, min_observations=2, random_state=None):
    if random_state is not None:
        torch.manual_seed(random_state)
        np.random.seed(random_state)
        random.seed(random_state)

    data_np = dataset_tensor.numpy()
    X = data_np[:, :-1]
    y = data_np[:, -1]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=random_state, stratify=y
    )

    train_tensor = torch.tensor(np.column_stack([X_train, y_train]), dtype=torch.float32)
    test_tensor = torch.tensor(np.column_stack([X_test, y_test]), dtype=torch.float32)

    cart = CART(dataset_torch=train_tensor,
                max_cart_depth=max_depth,
                min_observations=min_observations)

    import time
    start_time = time.time()
    cart.build_cart(train_tensor)
    training_time = time.time() - start_time

    start_time = time.time()
    accuracy = test_cart(cart, test_tensor)

    test_features = test_tensor[:, :-1]
    true_labels = test_tensor[:, -1].long()

    predicted_labels = []
    for i in range(test_features.shape[0]):
        prediction = cart.evaluate_input(test_features[i])
        predicted_labels.append(prediction)

    predicted_labels = torch.tensor(predicted_labels)
    evaluation_time = time.time() - start_time

    f1_score = calculate_f1_score_silent(true_labels, predicted_labels, num_classes=2)

    return {
        'accuracy': accuracy,
        'f1_score': f1_score,
        'training_time': training_time,
        'evaluation_time': evaluation_time,
        'cart': cart,
        'train_size': train_tensor.shape[0],
        'test_size': test_tensor.shape[0]
    }


def calculate_f1_score_silent(y_true, y_pred, num_classes=2):
    f1_scores = []

    for class_id in range(num_classes):
        tp = torch.sum((y_true == class_id) & (y_pred == class_id)).float()
        fp = torch.sum((y_true != class_id) & (y_pred == class_id)).float()
        fn = torch.sum((y_true == class_id) & (y_pred != class_id)).float()

        precision = tp / (tp + fp) if (tp + fp) > 0 else torch.tensor(0.0)
        recall = tp / (tp + fn) if (tp + fn) > 0 else torch.tensor(0.0)
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else torch.tensor(0.0)
        f1_scores.append(f1.item())

    return sum(f1_scores) / len(f1_scores)


def evaluate_multiple_runs(dataset_tensor, max_depth, n_runs=10, min_observations=2):
    print(f"\n🔄 Ejecutando {n_runs} corridas con profundidad máxima = {max_depth}")
    print("-" * 60)

    results = []
    best_f1_idx = 0
    best_f1_score = -1

    for run in range(n_runs):
        print(f"   Corrida {run + 1}/{n_runs}...", end=" ")

        result = single_run_evaluation(
            dataset_tensor,
            max_depth,
            min_observations,
            random_state=42 + run
        )

        results.append(result)

        if result['f1_score'] > best_f1_score:
            best_f1_score = result['f1_score']
            best_f1_idx = run

        print(f"✅ Acc: {result['accuracy']:.3f}, F1: {result['f1_score']:.3f}")

    accuracies = [r['accuracy'] for r in results]
    f1_scores = [r['f1_score'] for r in results]
    train_times = [r['training_time'] for r in results]
    eval_times = [r['evaluation_time'] for r in results]

    stats = {
        'accuracy_mean': np.mean(accuracies),
        'accuracy_std': np.std(accuracies),
        'f1_mean': np.mean(f1_scores),
        'f1_std': np.std(f1_scores),
        'train_time_mean': np.mean(train_times),
        'train_time_std': np.std(train_times),
        'eval_time_mean': np.mean(eval_times),
        'eval_time_std': np.std(eval_times),
        'best_run_idx': best_f1_idx,
        'best_cart': results[best_f1_idx]['cart'],
        'all_results': results
    }

    return stats

def display_results_table(stats_depth_2, stats_depth_3):
    print(f"\n📊 TABLA DE RESULTADOS (Promedio ± Desviación Estándar)")
    print("=" * 80)
    print(f"{'Métrica':<25} {'Profundidad 2':<25} {'Profundidad 3':<25}")
    print("-" * 80)

    print(f"{'Accuracy':<25} "
          f"{stats_depth_2['accuracy_mean']:.4f} ± {stats_depth_2['accuracy_std']:.4f} "
          f"{stats_depth_3['accuracy_mean']:.4f} ± {stats_depth_3['accuracy_std']:.4f}")

    print(f"{'F1-Score':<25} "
          f"{stats_depth_2['f1_mean']:.4f} ± {stats_depth_2['f1_std']:.4f:} "
          f"{stats_depth_3['f1_mean']:.4f} ± {stats_depth_3['f1_std']:.4f}")

    print(f"{'Tiempo Entren. (s)':<25} "
          f"{stats_depth_2['train_time_mean']:.4f} ± {stats_depth_2['train_time_std']:.4f} "
          f"{stats_depth_3['train_time_mean']:.4f} ± {stats_depth_3['train_time_std']:.4f}")

    print(f"{'Tiempo Eval. (s)':<25} "
          f"{stats_depth_2['eval_time_mean']:.4f} ± {stats_depth_2['eval_time_std']:.4f:} "
          f"{stats_depth_3['eval_time_mean']:.4f} ± {stats_depth_3['eval_time_std']:.4f}")


def generate_tree_visualization(cart, filename):
    xml_content = cart.to_xml(filename)
    print(f"   📄 Árbol guardado en: {filename}")
    print(f"   🌐 Para visualizar: abrir {filename} en navegador web")
    return xml_content


print("=" * 80)
print("PARTE 3 - PUNTO 2: EVALUACIÓN CON 10 PARTICIONES ALEATORIAS (70%-30%)")
print("=" * 80)

print(f"\n📋 Configuración:")
print(f"   • Dataset: {complete_dataset.shape[0]} observaciones")
print(f"   • Particiones: 10 corridas aleatorias")
print(f"   • División: 70% entrenamiento, 30% prueba")
print(f"   • Profundidades a evaluar: 2 y 3")
print(f"   • Mínimo 2 observaciones por hoja")

stats_depth_2 = evaluate_multiple_runs(complete_dataset, max_depth=2, n_runs=10, min_observations=2)
stats_depth_3 = evaluate_multiple_runs(complete_dataset, max_depth=3, n_runs=10, min_observations=2)

display_results_table(stats_depth_2, stats_depth_3)

print(f"\n🏆 MEJORES CORRIDAS:")
print(f"   • Profundidad 2: Corrida {stats_depth_2['best_run_idx'] + 1} (F1-Score: {stats_depth_2['f1_mean']:.4f})")
print(f"   • Profundidad 3: Corrida {stats_depth_3['best_run_idx'] + 1} (F1-Score: {stats_depth_3['f1_mean']:.4f})")

best_overall = stats_depth_2 if stats_depth_2['f1_mean'] > stats_depth_3['f1_mean'] else stats_depth_3
best_depth = 2 if stats_depth_2['f1_mean'] > stats_depth_3['f1_mean'] else 3

print(f"\n🌳 GENERANDO VISUALIZACIÓN DEL MEJOR ÁRBOL:")
print(f"   • Mejor profundidad general: {best_depth}")

xml_file_depth_2 = f"mejor_arbol_profundidad_2.xml"
xml_file_depth_3 = f"mejor_arbol_profundidad_3.xml"

generate_tree_visualization(stats_depth_2['best_cart'], xml_file_depth_2)
generate_tree_visualization(stats_depth_3['best_cart'], xml_file_depth_3)

print(f"\n📈 ANÁLISIS COMPARATIVO:")
print("-" * 60)

if stats_depth_3['accuracy_mean'] > stats_depth_2['accuracy_mean']:
    print(
        f"   • ✅ Profundidad 3 tiene mejor accuracy promedio (+{stats_depth_3['accuracy_mean'] - stats_depth_2['accuracy_mean']:.4f})")
else:
    print(
        f"   • ✅ Profundidad 2 tiene mejor accuracy promedio (+{stats_depth_2['accuracy_mean'] - stats_depth_3['accuracy_mean']:.4f})")

if stats_depth_3['f1_mean'] > stats_depth_2['f1_mean']:
    print(
        f"   • ✅ Profundidad 3 tiene mejor F1-Score promedio (+{stats_depth_3['f1_mean'] - stats_depth_2['f1_mean']:.4f})")
else:
    print(
        f"   • ✅ Profundidad 2 tiene mejor F1-Score promedio (+{stats_depth_2['f1_mean'] - stats_depth_3['f1_mean']:.4f})")

print(
    f"   • Variabilidad Accuracy - Prof. 2: {stats_depth_2['accuracy_std']:.4f}, Prof. 3: {stats_depth_3['accuracy_std']:.4f}")
print(f"   • Variabilidad F1-Score - Prof. 2: {stats_depth_2['f1_std']:.4f}, Prof. 3: {stats_depth_3['f1_std']:.4f}")

more_stable_acc = "Profundidad 2" if stats_depth_2['accuracy_std'] < stats_depth_3['accuracy_std'] else "Profundidad 3"
more_stable_f1 = "Profundidad 2" if stats_depth_2['f1_std'] < stats_depth_3['f1_std'] else "Profundidad 3"

print(f"   • Más estable en Accuracy: {more_stable_acc}")
print(f"   • Más estable en F1-Score: {more_stable_f1}")

print(
    f"   • Tiempo entrenamiento - Prof. 2: {stats_depth_2['train_time_mean']:.4f}s, Prof. 3: {stats_depth_3['train_time_mean']:.4f}s")
print(
    f"   • Tiempo evaluación - Prof. 2: {stats_depth_2['eval_time_mean']:.4f}s, Prof. 3: {stats_depth_3['eval_time_mean']:.4f}s")

print(f"\n💡 PROPUESTA DE OPTIMIZACIÓN CON JENSEN-SHANNON:")
print("-" * 60)
print(f"   La distancia Jensen-Shannon calculada en la Parte 1 podría usarse para:")
print(f"   1. Pre-seleccionar características más discriminativas antes del entrenamiento")
print(f"   2. Reducir el espacio de búsqueda en select_best_feature_and_thresh")
print(f"   3. Priorizar splits en características con mayor separabilidad entre clases")
print(f"   4. Implementar poda temprana basada en distancias JS bajas")
print(f"   5. Usar JS como criterio alternativo al Gini para splits más informativos")