In [1]:
import pandas as pd
import numpy as np
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer

from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score

from kan import KAN


In [2]:
df = pd.read_csv("../training.csv")

df.head()

Unnamed: 0,EventId,DER_mass_MMC,DER_mass_transverse_met_lep,DER_mass_vis,DER_pt_h,DER_deltaeta_jet_jet,DER_mass_jet_jet,DER_prodeta_jet_jet,DER_deltar_tau_lep,DER_pt_tot,...,PRI_jet_num,PRI_jet_leading_pt,PRI_jet_leading_eta,PRI_jet_leading_phi,PRI_jet_subleading_pt,PRI_jet_subleading_eta,PRI_jet_subleading_phi,PRI_jet_all_pt,Weight,Label
0,100000,138.47,51.655,97.827,27.98,0.91,124.711,2.666,3.064,41.928,...,2,67.435,2.15,0.444,46.062,1.24,-2.475,113.497,0.002653,s
1,100001,160.937,68.768,103.235,48.146,-999.0,-999.0,-999.0,3.473,2.078,...,1,46.226,0.725,1.158,-999.0,-999.0,-999.0,46.226,2.233584,b
2,100002,-999.0,162.172,125.953,35.635,-999.0,-999.0,-999.0,3.148,9.336,...,1,44.251,2.053,-2.028,-999.0,-999.0,-999.0,44.251,2.347389,b
3,100003,143.905,81.417,80.943,0.414,-999.0,-999.0,-999.0,3.31,0.414,...,0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-0.0,5.446378,b
4,100004,175.864,16.915,134.805,16.405,-999.0,-999.0,-999.0,3.891,16.405,...,0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,0.0,6.245333,b


In [3]:
# Reemplazar valores inválidos
df.replace(-999.0, np.nan, inplace=True)

# Separar features y target
X = df.drop(columns=['EventId', 'Weight', 'Label'])
y = df['Label'].map({'s': 1, 'b': 0})  # Convertir a 1 y 0


In [4]:
# Imputar con la media
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
X = pd.DataFrame(X_imputed, columns=X.columns)


In [5]:
# Normalizar entre 0 y 1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")


Train shape: (200000, 30), Test shape: (50000, 30)


In [13]:
import torch

# Convertir a tensores
train_inputs = torch.tensor(X_train, dtype=torch.float32)
test_inputs = torch.tensor(X_test, dtype=torch.float32)
train_labels = torch.tensor(y_train.to_numpy(), dtype=torch.long)
test_labels = torch.tensor(y_test.to_numpy(), dtype=torch.long)

# Crear dataset para el modelo
dataset = {
    'train_input': train_inputs,
    'train_label': train_labels,
    'test_input': test_inputs,
    'test_label': test_labels
}


In [14]:
from kan import KAN

model = KAN(width=[24, 12, 6, 2], grid=5, k=3, seed=0, device='cpu')

# Probar predicción antes de entrenar
model(dataset['train_input'])

# Entrenar
results = model.fit(
    dataset,
    opt="Adam",
    metrics=(), 
    loss_fn=torch.nn.CrossEntropyLoss(),
    steps=40,
    lamb=0.001,
    lamb_entropy=0.1
)


checkpoint directory created: ./model
saving model version 0.0




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

| train_loss: 1.25e+00 | test_loss: 9.36e-01 | reg: 1.39e+03 | : 100%|█| 40/40 [08:08<00:00, 12.21s/

saving model version 0.1





In [15]:
# Obtener predicciones en test
with torch.no_grad():
    logits = model(dataset['test_input'])
    y_pred_test = torch.argmax(logits, dim=1).numpy()
    y_true_test = dataset['test_label'].numpy()
    y_prob_test = torch.softmax(logits, dim=1)[:, 1].numpy()

# Calcular métricas
acc = accuracy_score(y_true_test, y_pred_test)
recall = recall_score(y_true_test, y_pred_test)
f1 = f1_score(y_true_test, y_pred_test)
auc = roc_auc_score(y_true_test, y_prob_test)

# Mostrar resultados
print("🔍 Métricas del modelo KAN:")
print(f"Accuracy:     {acc:.4f}")
print(f"Recall:       {recall:.4f}")
print(f"F1 Score:     {f1:.4f}")
print(f"AUC-ROC:      {auc:.4f}")


🔍 Métricas del modelo KAN:
Accuracy:     0.5624
Recall:       0.2990
F1 Score:     0.3189
AUC-ROC:      0.4857


In [17]:
def train_acc():
    return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == dataset['train_label']).float())

def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).float())


In [18]:
model = KAN(
    width=[24, 20, 10, 2],  # arquitectura más expresiva
    grid=5,
    k=3,
    seed=0,
    device='cpu'  # o 'cuda' si estás en GPU
)

# Entrenamiento
results = model.fit(
    dataset,
    opt="Adam",
    metrics=(train_acc, test_acc),  # si quieres ver evolución
    loss_fn=torch.nn.CrossEntropyLoss(),
    steps=100,                      # más pasos de entrenamiento
    lamb=0.0001,                    # penalización más relajada
    lamb_entropy=0.01              # más libertad para ajustar los nodos
)


checkpoint directory created: ./model
saving model version 0.0




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

saving model version 0.1


In [19]:
with torch.no_grad():
    logits = model(dataset['test_input'])
    y_pred_test = torch.argmax(logits, dim=1).numpy()
    y_true_test = dataset['test_label'].numpy()
    y_prob_test = torch.softmax(logits, dim=1)[:, 1].numpy()

print("🔍 Métricas del modelo KAN:")
print("Accuracy:", round(accuracy_score(y_true_test, y_pred_test), 4))
print("Recall:", round(recall_score(y_true_test, y_pred_test), 4))
print("F1 Score:", round(f1_score(y_true_test, y_pred_test), 4))
print("AUC-ROC:", round(roc_auc_score(y_true_test, y_prob_test), 4))


🔍 Métricas del modelo KAN:
Accuracy: 0.7416
Recall: 0.5398
F1 Score: 0.5888
AUC-ROC: 0.8


In [20]:
model = KAN(
    width=[24, 20, 10, 4, 2],  # más profundidad
    grid=7,
    k=3,
    seed=0,
    device='cpu'
)

results = model.fit(
    dataset,
    opt="Adam",
    metrics=(train_acc, test_acc),
    loss_fn=torch.nn.CrossEntropyLoss(),
    steps=150,              # más entrenamiento
    lamb=0.0001,
    lamb_entropy=0.01
)
import os; os.system('say "KAN terminado"')

checkpoint directory created: ./model
saving model version 0.0




[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

saving model version 0.1


0

In [21]:
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
import torch

# Obtener predicciones en test
with torch.no_grad():
    logits = model(dataset['test_input'])
    y_pred_test = torch.argmax(logits, dim=1).numpy()
    y_true_test = dataset['test_label'].numpy()
    y_prob_test = torch.softmax(logits, dim=1)[:, 1].numpy()

# Calcular métricas
acc = accuracy_score(y_true_test, y_pred_test)
recall = recall_score(y_true_test, y_pred_test)
f1 = f1_score(y_true_test, y_pred_test)
auc = roc_auc_score(y_true_test, y_prob_test)

# Mostrar resultados
print("🔍 Métricas finales del modelo KAN:")
print(f"Accuracy:  {acc:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")
print(f"AUC-ROC:   {auc:.4f}")


🔍 Métricas finales del modelo KAN:
Accuracy:  0.8077
Recall:    0.6549
F1 Score:  0.7000
AUC-ROC:   0.8713
