In [None]:
import cv2
import string
import numpy as np
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model

import kagglehub

# Download latest version
path = kagglehub.dataset_download("fournierp/captcha-version-2-images")
print("Path to dataset files:", path)

sym = string.ascii_lowercase + "0123456789"
num_sym = len(sym)

# Shape of an image
img_shape = (50, 200, 1)

# Number of symbols
print("Number of symbols:", num_sym)

def create_model():
    """Create and return the CNN model for CAPTCHA solving"""
    # Input layer
    img = layers.Input(shape=img_shape)

    # Convolutional layers
    out = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(img)
    out = layers.MaxPooling2D(padding='same')(out)

    out = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(out)
    out = layers.MaxPooling2D(padding='same')(out)

    out = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(out)
    out = layers.BatchNormalization()(out)
    out = layers.MaxPooling2D(padding='same')(out)

    out = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(out)
    out = layers.MaxPooling2D(padding='same')(out)

    # Flatten the output
    flatten = layers.Flatten()(out)  # Fixed: use 'out' not undefined 'flatten'

    # Create 5 outputs for 5 characters
    outputs = []
    for _ in range(5):
        x = layers.Dense(128, activation='relu')(flatten)  # Fixed: use 'flatten'
        x = layers.Dropout(0.5)(x)
        x = layers.Dense(num_sym, activation='softmax')(x)
        outputs.append(x)

    # Compile the model
    model = Model(img, outputs)
    # For multi-output model, need metrics for each output
    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy'] * 5  # One accuracy metric for each of the 5 outputs
    )

    return model

def preprocessing():
    """Preprocess the CAPTCHA images and labels"""
    data_path = os.path.join(path, "samples/samples")

    if not os.path.exists(data_path):
        raise FileNotFoundError(f"Data path does not exist: {data_path}")

    file_list = os.listdir(data_path)
    samp = len(file_list)

    X = np.zeros((samp, 50, 200, 1))
    y = np.zeros((5, samp, num_sym))

    valid_samples = 0
    for i, p in enumerate(file_list):
        img_path = os.path.join(data_path, p)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

        if img is None:
            print(f"Warning: Could not read image {p}")
            continue

        p_target = p[:-4]  # Remove ".png"

        if len(p_target) == 5:  # CAPTCHA has 5 characters
            img = img / 255.0
            img = np.reshape(img, (50, 200, 1))

            t = np.zeros((5, num_sym))
            for j, l in enumerate(p_target):
                idx = sym.find(l)
                if idx != -1:  # Character found in symbol set
                    t[j, idx] = 1
                else:
                    print(f"Warning: Character '{l}' not found in symbol set")

            X[valid_samples] = img
            y[:, valid_samples] = t
            valid_samples += 1

    # Trim arrays to actual valid samples
    X = X[:valid_samples]
    y = y[:, :valid_samples]

    return X, y

def prediction(file_path):
    """Predict CAPTCHA from image file"""
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"Error: File not found - {file_path}")
        return None

    # Read image
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

    if img is None:
        print("Error: Could not read image")
        return None

    # Scale the image
    img = img / 255.0

    # Make prediction
    pred = model.predict(img[np.newaxis, :, :, np.newaxis])

    # Convert predictions to characters
    captcha = ''
    for i in range(5):
        idx = np.argmax(pred[i])
        captcha += sym[idx]

    return captcha

# Main execution
print("Starting preprocessing...")
X, y = preprocessing()
print(f"Loaded {X.shape[0]} samples")

# Convert y into list of 5 arrays (one for each character position)
y_list = [y[i] for i in range(5)]

# Split train/test data
split_idx = int(0.8 * len(X))  # Use 80% for training
X_train, y_train = X[:split_idx], [arr[:split_idx] for arr in y_list]
X_test, y_test = X[split_idx:], [arr[split_idx:] for arr in y_list]

print(f"Training samples: {X_train.shape[0]}")
print(f"Testing samples: {X_test.shape[0]}")

# Create model
print("Creating model...")
model = create_model()

# Display model summary
model.summary()

# Train the model (removed duplicate training)
print("Starting training...")
history = model.fit(
    X_train, y_train,
    batch_size=64,
    epochs=50,
    verbose=1,
    validation_split=0.2
)

# Plot training curves
plt.figure(figsize=(12, 5))

# Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Accuracy (for the first character output, pattern is similar for others)
plt.subplot(1, 2, 2)
plt.plot(history.history['dense_1_accuracy'], label='Train Acc (Char 1)')
plt.plot(history.history['val_dense_1_accuracy'], label='Val Acc (Char 1)')
plt.title('Accuracy Curve (Char 1)')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

Downloading from https://www.kaggle.com/api/v1/datasets/download/fournierp/captcha-version-2-images?dataset_version_number=2...


100%|██████████| 17.4M/17.4M [00:01<00:00, 13.1MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/fournierp/captcha-version-2-images/versions/2
Number of symbols: 36
Starting preprocessing...
Loaded 1070 samples
Training samples: 856
Testing samples: 214
Creating model...


Starting training...
Epoch 1/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 2s/step - dense_1_accuracy: 0.0414 - dense_1_loss: 3.5770 - dense_3_accuracy: 0.0482 - dense_3_loss: 3.7549 - dense_5_accuracy: 0.0548 - dense_5_loss: 3.6357 - dense_7_accuracy: 0.0396 - dense_7_loss: 3.5888 - dense_9_accuracy: 0.0497 - dense_9_loss: 3.5999 - loss: 18.1574 - val_dense_1_accuracy: 0.1163 - val_dense_1_loss: 3.5647 - val_dense_3_accuracy: 0.0640 - val_dense_3_loss: 3.5627 - val_dense_5_accuracy: 0.0581 - val_dense_5_loss: 3.5679 - val_dense_7_accuracy: 0.0523 - val_dense_7_loss: 3.5643 - val_dense_9_accuracy: 0.1105 - val_dense_9_loss: 3.5654 - val_loss: 17.8246
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - dense_1_accuracy: 0.0598 - dense_1_loss: 3.2912 - dense_3_accuracy: 0.0860 - dense_3_loss: 3.2543 - dense_5_accuracy: 0.0581 - dense_5_loss: 3.2952 - dense_7_accuracy: 0.0575 - dense_7_loss: 3.3007 - dense_9_accuracy: 0.0793 - dense_

In [None]:
# Evaluate the model
print("Evaluating model...")
sc = model.evaluate(X_test, y_test, verbose=1)
print('Test Loss and accuracy:', sc)

# --- Add this function here ---
def evaluate_model(model, X_test, y_test):
    preds = model.predict(X_test)
    total_samples = X_test.shape[0]

    char_acc = []
    correct_full = 0

    for i in range(5):
        y_true = np.argmax(y_test[i], axis=1)
        y_pred = np.argmax(preds[i], axis=1)
        acc = np.mean(y_true == y_pred)
        char_acc.append(acc)

    # Full captcha accuracy (all 5 chars correct)
    for n in range(total_samples):
        pred_word = ''.join([sym[np.argmax(preds[i][n])] for i in range(5)])
        true_word = ''.join([sym[np.argmax(y_test[i][n])] for i in range(5)])
        if pred_word == true_word:
            correct_full += 1

    print("Character-level accuracy:", [round(a*100, 2) for a in char_acc])
    print("Full CAPTCHA accuracy:", round((correct_full/total_samples)*100, 2), "%")

# Call the function
evaluate_model(model, X_test, y_test)
# --- End new block ---

# Test predictions on sample images
print("\nTesting predictions...")
sample_files = ["8n5p3.png", "f2m8n.png", "dce8y.png", "3eny7.png", "npxb7.png"]

for filename in sample_files:
    file_path = os.path.join(path, "samples/samples", filename)
    pred_result = prediction(file_path)
    if pred_result:
        print(f"{filename}: {pred_result}")

In [None]:
# Display a sample image
# sample_img_path = os.path.join(path, "samples/samples/8n5p3.png")
# sample_img_path = os.path.join(path, "samples/samples/f2m8n.png")
# sample_img_path = os.path.join(path, "samples/samples/dce8y.png")
# sample_img_path = os.path.join(path, "samples/samples/3eny7.png")
sample_img_path = os.path.join(path, "samples/samples/npxb7.png")
if os.path.exists(sample_img_path):
    img = cv2.imread(sample_img_path, cv2.IMREAD_GRAYSCALE)
    if img is not None:
        plt.figure(figsize=(10, 3))
        plt.imshow(img, cmap='gray')
        plt.title(f"Sample CAPTCHA - Predicted: {prediction(sample_img_path)}")
        plt.axis('off')
        plt.show()

print("Training completed!")

In [None]:
# Save model
model.save("captcha_solver.keras")
print("Model saved as captcha_solver.keras")