In [4]:
# =====================================================
# MODELO CON PESOS COMPARTIDOS
# =====================================================

import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report


In [5]:
#-------------------------------------------------
# Cargar MNIST
# -------------------------------------------------

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.astype("float32") / 255.
X_test = X_test.astype("float32") / 255.

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [6]:
def crear_pares_balanceado(X, y, n_pairs=20000):
    pares = []
    etiquetas = []
    
    # Construyo índice por clase
    clases = {}
    for i in range(10):
        clases[i] = np.where(y == i)[0]

    mitad = n_pairs // 2

    # ---------------------------
    # Pares iguales (positivos)
    # ---------------------------
    for _ in range(mitad):
        c = np.random.randint(0, 10)
        i1, i2 = np.random.choice(clases[c], size=2, replace=False)
        pares.append([X[i1], X[i2]])
        etiquetas.append(1)

    # ---------------------------
    # Pares distintos (negativos)
    # ---------------------------
    for _ in range(mitad):
        c1, c2 = np.random.choice(range(10), size=2, replace=False)
        i1 = np.random.choice(clases[c1])
        i2 = np.random.choice(clases[c2])
        pares.append([X[i1], X[i2]])
        etiquetas.append(0)

    # Mezclar para evitar orden fijo
    pares = np.array(pares)
    etiquetas = np.array(etiquetas)

    idx = np.random.permutation(n_pairs)
    return pares[idx], etiquetas[idx]


In [7]:
# -------------------------------------------------
# Crear pares
# -------------------------------------------------

def crear_pares(X, y):
    pares = []
    etiquetas = []

    for _ in range(20000):  
        idx1 = np.random.randint(0, len(X))
        idx2 = np.random.randint(0, len(X))

        pares.append([X[idx1], X[idx2]])
        etiquetas.append(1 if y[idx1] == y[idx2] else 0)

    return np.array(pares), np.array(etiquetas)


In [8]:
train_pairs, train_labels = crear_pares_balanceado(X_train, y_train)
test_pairs, test_labels = crear_pares_balanceado(X_test, y_test)

A_train = train_pairs[:, 0]
B_train = train_pairs[:, 1]
A_test = test_pairs[:, 0]
B_test = test_pairs[:, 1]

print("A_train --> ", A_train.shape)
print("B_train --> ", B_train.shape)
print("A_test  --> ", B_test.shape)
print("B_test  --> ", B_test.shape)

print("Pares iguales en train :", np.sum(train_labels == 1))
print("Pares distintos en train:", np.sum(train_labels == 0))

print("Pares iguales en test  :", np.sum(test_labels == 1))
print("Pares distintos en test :", np.sum(test_labels == 0))

A_train -->  (20000, 784)
B_train -->  (20000, 784)
A_test  -->  (20000, 784)
B_test  -->  (20000, 784)
Pares iguales en train : 10000
Pares distintos en train: 10000
Pares iguales en test  : 10000
Pares distintos en test : 10000


In [9]:
# -------------------------------------------------
# Modelo siamés
# -------------------------------------------------

shared_dense = Dense(64, activation="tanh")

input_a = Input(shape=(784,))
input_b = Input(shape=(784,))

output_a = shared_dense(input_a)
output_b = shared_dense(input_b)

merged = Concatenate()([output_a, output_b])
final = Dense(1, activation="sigmoid")(merged)

model = Model([input_a, input_b], final)

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy", "AUC"]   # <---- MÉTRICA EXTRA
)

model.summary()


In [10]:
# -------------------------------------------------
# Entrenamiento
# -------------------------------------------------

model.fit(
    [A_train, B_train],
    train_labels,
    validation_data=([A_test, B_test], test_labels),
    epochs=10,
    batch_size=32
)


Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - AUC: 0.5042 - accuracy: 0.5009 - loss: 0.7068 - val_AUC: 0.5028 - val_accuracy: 0.5014 - val_loss: 0.7065
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - AUC: 0.5391 - accuracy: 0.5294 - loss: 0.6924 - val_AUC: 0.5037 - val_accuracy: 0.5069 - val_loss: 0.6974
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - AUC: 0.5547 - accuracy: 0.5412 - loss: 0.6887 - val_AUC: 0.5018 - val_accuracy: 0.4982 - val_loss: 0.6987
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - AUC: 0.5658 - accuracy: 0.5464 - loss: 0.6858 - val_AUC: 0.5089 - val_accuracy: 0.5017 - val_loss: 0.6988
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - AUC: 0.5882 - accuracy: 0.5612 - loss: 0.6802 - val_AUC: 0.5038 - val_accuracy: 0.5044 - val_loss: 0.7058
Epoch 6/10
[1m625/625[0m [3

<keras.src.callbacks.history.History at 0x1be7a8ef770>

In [11]:
# -------------------------------------------------
# Evaluación final con métricas adicionales
# -------------------------------------------------

# Predicciones para AUC y matriz de confusión
y_pred_proba = model.predict([A_test, B_test])
y_pred = (y_pred_proba > 0.5).astype(int)

auc = roc_auc_score(test_labels, y_pred_proba)
cm = confusion_matrix(test_labels, y_pred)
cr = classification_report(test_labels, y_pred)

print("\n===== MÉTRICAS FINALES =====")
print(f"AUC FINAL: {auc:.4f}")
print("\nMatriz de Confusión:")
print(cm)
print("\nReporte de Clasificación:")
print(cr)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 688us/step

===== MÉTRICAS FINALES =====
AUC FINAL: 0.5034

Matriz de Confusión:
[[5396 4604]
 [5338 4662]]

Reporte de Clasificación:
              precision    recall  f1-score   support

           0       0.50      0.54      0.52     10000
           1       0.50      0.47      0.48     10000

    accuracy                           0.50     20000
   macro avg       0.50      0.50      0.50     20000
weighted avg       0.50      0.50      0.50     20000

