In [9]:
# =====================================================
# MODELO CON PESOS COMPARTIDOS
# =====================================================

import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report


In [10]:
#-------------------------------------------------
# Cargar MNIST
# -------------------------------------------------

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.astype("float32") / 255.
X_test = X_test.astype("float32") / 255.

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)


In [11]:
def crear_pares_balanceado(X, y, n_pairs=20000):
    pares = []
    etiquetas = []
    
    # Construyo índice por clase
    clases = {}
    for i in range(10):
        clases[i] = np.where(y == i)[0]

    mitad = n_pairs // 2

    # ---------------------------
    # Pares iguales (positivos)
    # ---------------------------
    for _ in range(mitad):
        c = np.random.randint(0, 10)
        i1, i2 = np.random.choice(clases[c], size=2, replace=False)
        pares.append([X[i1], X[i2]])
        etiquetas.append(1)

    # ---------------------------
    # Pares distintos (negativos)
    # ---------------------------
    for _ in range(mitad):
        c1, c2 = np.random.choice(range(10), size=2, replace=False)
        i1 = np.random.choice(clases[c1])
        i2 = np.random.choice(clases[c2])
        pares.append([X[i1], X[i2]])
        etiquetas.append(0)

    # Mezclar para evitar orden fijo
    pares = np.array(pares)
    etiquetas = np.array(etiquetas)

    idx = np.random.permutation(n_pairs)
    return pares[idx], etiquetas[idx]


In [12]:
# -------------------------------------------------
# Crear pares
# -------------------------------------------------

def crear_pares(X, y):
    pares = []
    etiquetas = []

    for _ in range(20000):  
        idx1 = np.random.randint(0, len(X))
        idx2 = np.random.randint(0, len(X))

        pares.append([X[idx1], X[idx2]])
        etiquetas.append(1 if y[idx1] == y[idx2] else 0)

    return np.array(pares), np.array(etiquetas)


In [13]:
train_pairs, train_labels = crear_pares_balanceado(X_train, y_train)
test_pairs, test_labels = crear_pares_balanceado(X_test, y_test)

A_train = train_pairs[:, 0]
B_train = train_pairs[:, 1]
A_test = test_pairs[:, 0]
B_test = test_pairs[:, 1]

print("A_train --> ", A_train.shape)
print("B_train --> ", B_train.shape)
print("A_test  --> ", B_test.shape)
print("B_test  --> ", B_test.shape)

print("Pares iguales en train :", np.sum(train_labels == 1))
print("Pares distintos en train:", np.sum(train_labels == 0))

print("Pares iguales en test  :", np.sum(test_labels == 1))
print("Pares distintos en test :", np.sum(test_labels == 0))

A_train -->  (20000, 784)
B_train -->  (20000, 784)
A_test  -->  (20000, 784)
B_test  -->  (20000, 784)
Pares iguales en train : 10000
Pares distintos en train: 10000
Pares iguales en test  : 10000
Pares distintos en test : 10000


In [14]:
# -------------------------------------------------
# Modelo siamés
# -------------------------------------------------

shared_dense = Dense(64, activation="tanh")

input_a = Input(shape=(784,))
input_b = Input(shape=(784,))

output_a = shared_dense(input_a)
output_b = shared_dense(input_b)

merged = Concatenate()([output_a, output_b])
final = Dense(1, activation="sigmoid")(merged)

model = Model([input_a, input_b], final)

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy", "AUC"]   # <---- MÉTRICA EXTRA
)

model.summary()


Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 784)]        0           []                               
                                                                                                  
 input_6 (InputLayer)           [(None, 784)]        0           []                               
                                                                                                  
 dense_4 (Dense)                (None, 64)           50240       ['input_5[0][0]',                
                                                                  'input_6[0][0]']                
                                                                                                  
 concatenate_2 (Concatenate)    (None, 128)          0           ['dense_4[0][0]',          

In [15]:
# -------------------------------------------------
# Entrenamiento
# -------------------------------------------------

model.fit(
    [A_train, B_train],
    train_labels,
    validation_data=([A_test, B_test], test_labels),
    epochs=10,
    batch_size=32
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1ee60d0d648>

In [16]:
# -------------------------------------------------
# Evaluación final con métricas adicionales
# -------------------------------------------------

# Predicciones para AUC y matriz de confusión
y_pred_proba = model.predict([A_test, B_test])
y_pred = (y_pred_proba > 0.5).astype(int)

auc = roc_auc_score(test_labels, y_pred_proba)
cm = confusion_matrix(test_labels, y_pred)
cr = classification_report(test_labels, y_pred)

print("\n===== MÉTRICAS FINALES =====")
print(f"AUC FINAL: {auc:.4f}")
print("\nMatriz de Confusión:")
print(cm)
print("\nReporte de Clasificación:")
print(cr)



===== MÉTRICAS FINALES =====
AUC FINAL: 0.5001

Matriz de Confusión:
[[5454 4546]
 [5453 4547]]

Reporte de Clasificación:
              precision    recall  f1-score   support

           0       0.50      0.55      0.52     10000
           1       0.50      0.45      0.48     10000

    accuracy                           0.50     20000
   macro avg       0.50      0.50      0.50     20000
weighted avg       0.50      0.50      0.50     20000

