In [2]:
# Importamos las librerías necesarias
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Ruta a la carpeta 'train' y al archivo 'train.csv'
data_dir = 'captcha/train'
csv_path = 'captcha/train.csv'

# Cargamos las etiquetas desde el archivo CSV
labels_df = pd.read_csv(csv_path)

# Mostramos las primeras filas para verificar
print("Ejemplo de etiquetas:")
print(labels_df.head())

Ejemplo de etiquetas:
   Id  Label
0   0  24706
1   1  80344
2   2  76907
3   3  83941
4   4   9411


In [10]:
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Parámetros
img_height, img_width = 50, 100
num_channels = 1
max_length = 6
characters = '0123456789'
char_to_index = {char: idx for idx, char in enumerate(characters)}
num_classes = len(characters)

# Función para codificar etiqueta (string) en lista de índices
def encode_label(label):
    return [char_to_index[c] for c in str(label).zfill(max_length)]

# Cargar imágenes y etiquetas
images = []
labels = []

for _, row in labels_df.iterrows():
    # Convertimos el Id a un nombre de archivo de 5 dígitos con ceros a la izquierda
    filename = f"{int(row['Id']):05d}.png"
    img_path = os.path.join(data_dir, filename)

    try:
        # Cargamos la imagen en escala de grises y la redimensionamos
        img = load_img(img_path, color_mode='grayscale', target_size=(img_height, img_width))
        img_array = img_to_array(img) / 255.0
        images.append(img_array)

        # Codificamos la etiqueta
        label = encode_label(row['Label'])
        labels.append(label)
    except Exception as e:
        print(f"Error cargando {img_path}: {e}")

# Convertimos a arrays numpy
X = np.array(images)
y = np.array(labels)

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (10000, 50, 100, 1)
y shape: (10000, 6)


In [4]:
def crear_cnn():
    from tensorflow.keras import layers, models

    input_shape = (img_height, img_width, num_channels)
    inputs = tf.keras.Input(shape=input_shape)

    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)

    # Una salida para cada carácter del CAPTCHA
    outputs = []
    for i in range(max_length):
        outputs.append(layers.Dense(num_classes, activation='softmax', name=f'char_{i}')(x))

    model = models.Model(inputs=inputs, outputs=outputs, name='captcha_cnn')
    return model

# Crear el modelo
model = crear_cnn()
model.summary()

In [11]:
# Convertimos las etiquetas (enteros) en codificación one-hot
y_onehot = []

for i in range(max_length):
    y_onehot.append(tf.keras.utils.to_categorical(y[:, i], num_classes=num_classes))

# Verifica la forma
for i in range(max_length):
    print(f"y_onehot[{i}].shape = {y_onehot[i].shape}")

y_onehot[0].shape = (10000, 10)
y_onehot[1].shape = (10000, 10)
y_onehot[2].shape = (10000, 10)
y_onehot[3].shape = (10000, 10)
y_onehot[4].shape = (10000, 10)
y_onehot[5].shape = (10000, 10)


In [14]:
# Compilamos el modelo
model.compile(
    optimizer='adam',
    loss=['categorical_crossentropy'] * max_length,
    metrics=[['accuracy']] * max_length  # Aquí se asignan métricas por cada salida
)

In [15]:
history = model.fit(
    X, y_onehot,  # Asumimos que y_onehot ya está en formato one-hot
    batch_size=64,
    epochs=10,
    validation_split=0.1
)

Epoch 1/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 259ms/step - char_0_accuracy: 0.9622 - char_0_loss: 0.1976 - char_1_accuracy: 0.1051 - char_1_loss: 2.3354 - char_2_accuracy: 0.0945 - char_2_loss: 2.3515 - char_3_accuracy: 0.0960 - char_3_loss: 2.3514 - char_4_accuracy: 0.0985 - char_4_loss: 2.3353 - char_5_accuracy: 0.0953 - char_5_loss: 2.3473 - loss: 11.9185 - val_char_0_accuracy: 1.0000 - val_char_0_loss: 0.0209 - val_char_1_accuracy: 0.1340 - val_char_1_loss: 2.3051 - val_char_2_accuracy: 0.0990 - val_char_2_loss: 2.3044 - val_char_3_accuracy: 0.0990 - val_char_3_loss: 2.3093 - val_char_4_accuracy: 0.1180 - val_char_4_loss: 2.3051 - val_char_5_accuracy: 0.1110 - val_char_5_loss: 2.3043 - val_loss: 11.5496
Epoch 2/10
[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 249ms/step - char_0_accuracy: 1.0000 - char_0_loss: 0.0178 - char_1_accuracy: 0.1351 - char_1_loss: 2.2737 - char_2_accuracy: 0.1235 - char_2_loss: 2.2774 - char_3_accuracy: