<a href="https://colab.research.google.com/github/elango-ela/Captcha-Solving-Model/blob/main/Captcha_Solving_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install mltu

Installing collected packages: humanfriendly, coloredlogs, onnxruntime, mltu
Successfully installed coloredlogs-15.0.1 humanfriendly-10.0 mltu-1.1.8 onnxruntime-1.17.1


In [None]:
!pip install tf2onnx


Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/455.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━[0m [32m286.7/455.8 kB[0m [31m8.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx, tf2onnx
Successfully installed onnx-1.15.0 tf2onnx-1.16.1


In [None]:
from tensorflow import keras

In [None]:
from keras import layers
from keras.models import Model
from mltu.tensorflow.model_utils import residual_block

In [None]:
def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):

    inputs = layers.Input(shape=input_dim, name="input")

    # normalize images here instead in preprocessing step
    input = layers.Lambda(lambda x: x / 255)(inputs)

    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x7 = residual_block(x6, 32, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x8 = residual_block(x7, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)

    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
    blstm = layers.Dropout(dropout)(blstm)

    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)

    model = Model(inputs=inputs, outputs=output)
    return model

In [None]:
import tensorflow as tf
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
except: pass

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from mltu.tensorflow.dataProvider import DataProvider
from mltu.tensorflow.losses import CTCloss
from mltu.tensorflow.callbacks import Model2onnx,TrainLogger
from mltu.tensorflow.metrics import CWERMetric

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer,LabelPadding,LabelIndexer

from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate
from mltu.annotations.images import CVImage
from configs import ModelConfigs
import os
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile


In [None]:
def download_and_unzip():
    http_response = urlopen("https://github.com/AakashKumarNain/CaptchaCracker/raw/master/captcha_images_v2.zip")
    zipfile = ZipFile(BytesIO(http_response.read()))
    zipfile.extractall(path='/content/Datasets')

download_and_unzip()

In [None]:
dataset, vocab, max_len = [], set(), 0
captcha_path = "/content/Datasets/captcha_images_v2"  # Path to the directory

# Recursively iterate through the directory and its subdirectories
for root, dirs, files in os.walk(captcha_path):
    for file in files:
        file_path = os.path.join(root, file)  # Get the full path of the file
        label = os.path.splitext(file)[0]  # Get the file name without the extension
        dataset.append([file_path, label])  # Append [file_path, label] to the dataset list
        vocab.update(list(label))  # Update the vocabulary set with characters from the label
        max_len = max(max_len, len(label))  # Update the maximum label length


In [None]:
configs = ModelConfigs()

In [None]:
configs.vocab = "".join(vocab)
configs.max_text_length = max_len
configs.save()

In [None]:
len(configs.vocab)

19

In [None]:
data_provider= DataProvider(dataset=dataset,
                            skip_validation=True,
                            batch_size=configs.batch_size,
                            data_preprocessors=[ImageReader(CVImage)],
                            transformers=[ImageResizer(configs.width,configs.height),
                                          LabelIndexer(configs.vocab),
                                          LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab))
        ],)

INFO:DataProvider:Skipping Dataset validation...


In [None]:
train_data_provider,test_data_provider=data_provider.split(0.9)
train_data_provider.augmentors=[RandomBrightness(),RandomRotate(),RandomErodeDilate()]
model= train_model(input_dim=(50,200,3),output_dim=19)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=configs.learning_rate),
              loss=CTCloss(),
              metrics=CWERMetric(padding_token=len(configs.vocab)),
              run_eagerly=False)

In [None]:
model.summary(line_length=110)

In [None]:
early_stopper= EarlyStopping(monitor='val_cer',patience=50,verbose=1)
checkpoint = ModelCheckpoint("/content/model/model.h5", monitor="val_cer", verbose=1, save_best_only=True, mode="min")
trainLogger = TrainLogger('/content/model')


In [None]:
model_dir='/content/model'
tb_callback = TensorBoard(log_dir=os.path.join(model_dir, "logs"), update_freq=1)

# ModelCheckpoint callback with the updated model directory path
checkpoint = ModelCheckpoint(
    filepath=os.path.join(model_dir, "model.h5"),
    monitor="val_CER",
    verbose=1,
    save_best_only=True,
    mode="min"
)

# ReduceLROnPlateau callback
reduceLROnPlat = ReduceLROnPlateau(
    monitor="val_CER",
    factor=0.9,
    min_delta=1e-10,
    patience=20,
    verbose=1,
    mode="auto"
)

# Model2onnx callback with the updated model directory path
model2onnx = Model2onnx(os.path.join(model_dir, "model.h5"))

In [None]:
model.fit(
    train_data_provider,
    validation_data=test_data_provider,
    epochs=700,
    callbacks=[early_stopper, checkpoint, trainLogger, reduceLROnPlat, tb_callback, model2onnx],
    workers=configs.train_workers)

Epoch 1/700
Epoch 1: val_CER improved from inf to 1.00000, saving model to /content/model/model.h5


  saving_api.save_model(


Epoch 2/700
Epoch 2: val_CER did not improve from 1.00000
Epoch 3/700
Epoch 3: val_CER did not improve from 1.00000
Epoch 4/700
Epoch 4: val_CER did not improve from 1.00000
Epoch 5/700
Epoch 5: val_CER did not improve from 1.00000
Epoch 6/700
Epoch 6: val_CER did not improve from 1.00000
Epoch 7/700
Epoch 7: val_CER did not improve from 1.00000
Epoch 8/700
Epoch 8: val_CER did not improve from 1.00000
Epoch 9/700
Epoch 9: val_CER did not improve from 1.00000
Epoch 10/700
Epoch 10: val_CER did not improve from 1.00000
Epoch 11/700
Epoch 11: val_CER did not improve from 1.00000
Epoch 12/700
Epoch 12: val_CER did not improve from 1.00000
Epoch 13/700
Epoch 13: val_CER did not improve from 1.00000
Epoch 14/700
Epoch 14: val_CER did not improve from 1.00000
Epoch 15/700
Epoch 15: val_CER did not improve from 1.00000
Epoch 16/700
Epoch 16: val_CER did not improve from 1.00000
Epoch 17/700
Epoch 17: val_CER did not improve from 1.00000
Epoch 18/700
Epoch 18: val_CER did not improve from 1.00

<keras.src.callbacks.History at 0x7910c81b7cd0>

In [None]:
!pip install stow

In [None]:
import stow
# Save training and validation datasets as csv files
train_data_provider.to_csv(stow.join(configs.model_path, 'train.csv'))
test_data_provider.to_csv(stow.join(configs.model_path, 'val.csv'))


In [None]:
# inferenceModel.py
import cv2
import typing
import numpy as np

from mltu.inferenceModel import OnnxInferenceModel
from mltu.utils.text_utils import ctc_decoder, get_cer

class ImageToWordModel(OnnxInferenceModel):
    def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.char_list = char_list

    def predict(self, image: np.ndarray):
        image = cv2.resize(image, self.input_shape[:2][::-1])

        image_pred = np.expand_dims(image, axis=0).astype(np.float32)

        preds = self.model.run(None, {self.input_name: image_pred})[0]

        text = ctc_decoder(preds, self.char_list)[0]

        return text

if __name__ == "__main__":
    import pandas as pd
    from tqdm import tqdm
    from mltu.configs import BaseModelConfigs

    configs = BaseModelConfigs.load("/content/Models/02_captcha_to_text/202403021543/configs.yaml")

    model = ImageToWordModel(model_path='/content/model/model.onnx', char_list=configs.vocab)

    df = pd.read_csv("/content/Models/02_captcha_to_text/202403021543/val.csv").values.tolist()

    accum_cer = []
    for image_path, label in tqdm(df):
        image = cv2.imread(image_path)

        prediction_text = model.predict(image)

        cer = get_cer(prediction_text, label)
        print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")

        accum_cer.append(cer)

    print(f"Average CER: {np.average(accum_cer)}")

 38%|███▊      | 39/104 [00:00<00:00, 193.00it/s]

Image: /content/Datasets/captcha_images_v2/xdcn4.png, Label: xdcn4, Prediction: xdcn4, CER: 0.0
Image: /content/Datasets/captcha_images_v2/2x7bm.png, Label: 2x7bm, Prediction: 2x7bm, CER: 0.0
Image: /content/Datasets/captcha_images_v2/8gf7n.png, Label: 8gf7n, Prediction: 8gf7n, CER: 0.0
Image: /content/Datasets/captcha_images_v2/d7en3.png, Label: d7en3, Prediction: d7en3, CER: 0.0
Image: /content/Datasets/captcha_images_v2/xdn65.png, Label: xdn65, Prediction: xdn65, CER: 0.0
Image: /content/Datasets/captcha_images_v2/xe8xm.png, Label: xe8xm, Prediction: xe8xm, CER: 0.0
Image: /content/Datasets/captcha_images_v2/ewcf5.png, Label: ewcf5, Prediction: ewcf5, CER: 0.0
Image: /content/Datasets/captcha_images_v2/w46ep.png, Label: w46ep, Prediction: w46ep, CER: 0.0
Image: /content/Datasets/captcha_images_v2/5p3mm.png, Label: 5p3mm, Prediction: 5p3mm, CER: 0.0
Image: /content/Datasets/captcha_images_v2/4f8yp.png, Label: 4f8yp, Prediction: 4f8yp, CER: 0.0
Image: /content/Datasets/captcha_images_

 77%|███████▋  | 80/104 [00:00<00:00, 199.55it/s]

Image: /content/Datasets/captcha_images_v2/nfbg8.png, Label: nfbg8, Prediction: nfbg8, CER: 0.0
Image: /content/Datasets/captcha_images_v2/bmxpe.png, Label: bmxpe, Prediction: bmxpe, CER: 0.0
Image: /content/Datasets/captcha_images_v2/dy3cx.png, Label: dy3cx, Prediction: dy3cx, CER: 0.0
Image: /content/Datasets/captcha_images_v2/n7ebx.png, Label: n7ebx, Prediction: n7ebx, CER: 0.0
Image: /content/Datasets/captcha_images_v2/nny5e.png, Label: nny5e, Prediction: nny5e, CER: 0.0
Image: /content/Datasets/captcha_images_v2/3ny45.png, Label: 3ny45, Prediction: 3ny45, CER: 0.0
Image: /content/Datasets/captcha_images_v2/72m6f.png, Label: 72m6f, Prediction: 72m6f, CER: 0.0
Image: /content/Datasets/captcha_images_v2/244e2.png, Label: 244e2, Prediction: 244e2, CER: 0.0
Image: /content/Datasets/captcha_images_v2/3p4nn.png, Label: 3p4nn, Prediction: 3p4nn, CER: 0.0
Image: /content/Datasets/captcha_images_v2/3pe4g.png, Label: 3pe4g, Prediction: 3pe4g, CER: 0.0
Image: /content/Datasets/captcha_images_

100%|██████████| 104/104 [00:00<00:00, 198.36it/s]

Image: /content/Datasets/captcha_images_v2/f75cx.png, Label: f75cx, Prediction: f75cx, CER: 0.0
Image: /content/Datasets/captcha_images_v2/n7enn.png, Label: n7enn, Prediction: n7enn, CER: 0.0
Image: /content/Datasets/captcha_images_v2/36w25.png, Label: 36w25, Prediction: 36w25, CER: 0.0
Image: /content/Datasets/captcha_images_v2/3ndxd.png, Label: 3ndxd, Prediction: 3ndxd, CER: 0.0
Image: /content/Datasets/captcha_images_v2/pmg55.png, Label: pmg55, Prediction: pmg55, CER: 0.0
Image: /content/Datasets/captcha_images_v2/c8fxy.png, Label: c8fxy, Prediction: c8fxy, CER: 0.0
Image: /content/Datasets/captcha_images_v2/gng6e.png, Label: gng6e, Prediction: gng6e, CER: 0.0
Image: /content/Datasets/captcha_images_v2/y5n6d.png, Label: y5n6d, Prediction: y5n6d, CER: 0.0
Image: /content/Datasets/captcha_images_v2/3b4we.png, Label: 3b4we, Prediction: 3b4we, CER: 0.0
Image: /content/Datasets/captcha_images_v2/d3c8y.png, Label: d3c8y, Prediction: d3c8y, CER: 0.0
Image: /content/Datasets/captcha_images_


