In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install stow
!pip install keras
!pip install tf2onnx
!pip install mltu

Collecting stow
  Downloading stow-1.3.1-py3-none-any.whl (74 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/74.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m71.7/74.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.8/74.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: stow
Successfully installed stow-1.3.1
Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx, tf2o

In [3]:
import tensorflow as tf
import keras
import tf2onnx
import mltu
import stow

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)
print("tf2onnx version:", tf2onnx.__version__)
print("MLTU version:", mltu.__version__)
print("Stow", stow.__version__)


TensorFlow version: 2.15.0
Keras version: 2.15.0
tf2onnx version: 1.16.1
MLTU version: 1.2.4
Stow 1.3.0


In [5]:
import shutil

shutil.unpack_archive("/content/drive/MyDrive/captcha/datasets/train.zip", "/content/drive/MyDrive/captcha/datasets")

In [15]:
"""
Created By: ishwor subedi
Date: 2024-03-27
"""
import stow
from datetime import datetime

from mltu.configs import BaseModelConfigs


class ModelConfigs(BaseModelConfigs):
    """
    This class is used to define the model configuration
    """

    def __init__(self):
        super().__init__()
        self.model_path = stow.join('/content/drive/MyDrive/captcha/model',
                                    datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
        self.dataset_path = '/content/drive/MyDrive/captcha/datasets/train'
        self.splitted_dataset_path = '/content/drive/MyDrive/captcha/final_datasets'
        self.vocab = ''
        self.height = 50
        self.width = 200
        self.max_text_length = 0
        self.batch_size = 200
        self.learning_rate = 0.01
        self.train_epochs = 40
        self.train_workers = 20


In [13]:
"""
Created By: ishwor subedi
Date: 2024-03-28
"""
from sklearn.model_selection import train_test_split
import os

from sklearn.model_selection import train_test_split
import os
import shutil
from datetime import datetime


def split_dataset_into_train_and_test(dataset_path, destination_path, test_size=0.2):
    """
    Splits the dataset into training and testing sets and saves them in the destination path.

    Parameters:
    - dataset_path: The path to the dataset.
    - destination_path: The path where the split datasets will be saved.
    - test_size: The proportion of the dataset to include in the test split (default is 0.2).

    Returns:
    - train_dir: The directory of the training set.
    - test_dir: The directory of the testing set.
    """
    # Get all files in the dataset
    all_files = [os.path.join(dataset_path, file) for file in os.listdir(dataset_path)]

    # Split the files into training and testing sets
    train_files, test_files = train_test_split(all_files, test_size=test_size, random_state=42)

    # Create directories for the training and testing sets
    today_date = datetime.today().strftime('%Y%m%d')
    train_dir = os.path.join(destination_path, f"dataset_{today_date}", "train")
    test_dir = os.path.join(destination_path, f"dataset_{today_date}", "test")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for file in train_files:
        shutil.copy(file, train_dir)

    for file in test_files:
        shutil.copy(file, test_dir)


if __name__ == '__main__':
    configs = ModelConfigs()
    split_dataset_into_train_and_test(dataset_path=configs.dataset_path, test_size=0.2,
                                      destination_path=configs.splitted_dataset_path)


In [14]:
"""
Created By: ishwor subedi
Date: 2024-03-28
"""
from keras import layers
from keras.models import Model

from mltu.tensorflow.model_utils import residual_block


def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
    inputs = layers.Input(shape=input_dim, name="input")

    # normalize images here instead in preprocessing step
    input = layers.Lambda(lambda x: x / 255)(inputs)

    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x7 = residual_block(x6, 32, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x8 = residual_block(x7, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)

    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
    blstm = layers.Dropout(dropout)(blstm)

    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)

    model = Model(inputs=inputs, outputs=output)
    return model


In [16]:
"""
Created By: ishwor subedi
Date: 2024-03-28
 """

import tensorflow as tf

try:
    [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
except:
    pass
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard

from mltu.tensorflow.dataProvider import DataProvider
from mltu.tensorflow.losses import CTCloss
from mltu.tensorflow.callbacks import Model2onnx, TrainLogger
from mltu.tensorflow.metrics import CWERMetric

from mltu.preprocessors import ImageReader
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate
from mltu.annotations.images import CVImage

import os


class ModelTrainer:
    def __init__(self, train_dir, test_dir):
        self.train_dir = train_dir
        self.test_dir = test_dir
        self.configs = ModelConfigs()
        self.vocab = set()
        self.max_len = 0

    def prepare_data(self, dataset_path):
        """
        Prepares the data for training and testing by reading the images and their labels from the dataset path.
        :param dataset_path: The path to the dataset.
        :return: A list of image paths and their corresponding labels.
        """
        dataset = []
        for file in os.listdir(dataset_path):
            file_path = os.path.join(dataset_path, file)
            label = os.path.splitext(file)[0]
            dataset.append([file_path, label])
            self.vocab.update(list(label))
            self.max_len = max(self.max_len, len(label))
        return dataset

    def train(self):
        """
        Trains the model in the custom made architecture and saves the model to the model path.and also prepare the
        data in csv format for the training and testing data and convert the .h5 mocdel into onnx format for the
        deployment purpose. :return:  None
        """
        train_data = self.prepare_data(self.train_dir)
        test_data = self.prepare_data(self.test_dir)

        # Save vocab and maximum text length to configs
        self.configs.vocab = "".join(self.vocab)
        self.configs.max_text_length = self.max_len
        self.configs.save()

        # Create a data provider for the training and testing data
        train_data_provider = DataProvider(
            dataset=train_data,
            skip_validation=True,
            batch_size=self.configs.batch_size,
            data_preprocessors=[ImageReader(CVImage)],
            transformers=[
                ImageResizer(self.configs.width, self.configs.height),
                LabelIndexer(self.configs.vocab),
                LabelPadding(max_word_length=self.configs.max_text_length, padding_value=len(self.configs.vocab))
            ],
        )
        test_data_provider = DataProvider(
            dataset=test_data,
            skip_validation=True,
            batch_size=self.configs.batch_size,
            data_preprocessors=[ImageReader(CVImage)],
            transformers=[
                ImageResizer(self.configs.width, self.configs.height),
                LabelIndexer(self.configs.vocab),
                LabelPadding(max_word_length=self.configs.max_text_length, padding_value=len(self.configs.vocab))
            ],
        )

        # Augment training data with random brightness, rotation and erode/dilate
        train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]

        # Creating TensorFlow model architecture
        model = train_model(
            input_dim=(self.configs.height, self.configs.width, 3),
            output_dim=len(self.configs.vocab),
        )

        # Compile the model and print summary
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=self.configs.learning_rate),
            loss=CTCloss(),
            metrics=[CWERMetric(padding_token=len(self.configs.vocab))],
            run_eagerly=False
        )
        model.summary(line_length=110)

        # Define path to save the model
        os.makedirs(self.configs.model_path, exist_ok=True)

        # Define callbacks
        earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1, mode="min")
        checkpoint = ModelCheckpoint(f"{self.configs.model_path}/model.h5", monitor="val_CER", verbose=1,
                                     save_best_only=True, mode="min")
        trainLogger = TrainLogger(self.configs.model_path)
        tb_callback = TensorBoard(f"{self.configs.model_path}/logs", update_freq=1)
        reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1,
                                           mode="min")
        model2onnx = Model2onnx(f"{self.configs.model_path}/model.h5")

        # Train the model
        model.fit(
            train_data_provider,
            validation_data=test_data_provider,
            epochs=self.configs.train_epochs,
            callbacks=[earlystopper, checkpoint, trainLogger, reduceLROnPlat, tb_callback, model2onnx],
            workers=self.configs.train_workers

        )

        train_data_provider.to_csv(os.path.join(self.configs.model_path, "train.csv"))
        test_data_provider.to_csv(os.path.join(self.configs.model_path, "val.csv"))


In [18]:
if __name__ == "__main__":
    trainer = ModelTrainer(test_dir='/content/drive/MyDrive/captcha/final_datasets/dataset_20240328/test',
                           train_dir='/content/drive/MyDrive/captcha/final_datasets/dataset_20240328/train')
    trainer.train()


Model: "model_1"
______________________________________________________________________________________________________________
 Layer (type)                    Output Shape                     Param #    Connected to                     
 input (InputLayer)              [(None, 50, 200, 3)]             0          []                               
                                                                                                              
 lambda_1 (Lambda)               (None, 50, 200, 3)               0          ['input[0][0]']                  
                                                                                                              
 conv2d_24 (Conv2D)              (None, 50, 200, 16)              448        ['lambda_1[0][0]']               
                                                                                                              
 batch_normalization_18 (BatchN  (None, 50, 200, 16)              64         ['conv2d_24[0][0]'

  saving_api.save_model(


Epoch 2/40
Epoch 2: val_CER did not improve from 1.00000
Epoch 3/40
Epoch 3: val_CER did not improve from 1.00000
Epoch 4/40
Epoch 4: val_CER did not improve from 1.00000
Epoch 5/40
Epoch 5: val_CER did not improve from 1.00000
Epoch 6/40
Epoch 6: val_CER did not improve from 1.00000
Epoch 7/40
Epoch 7: val_CER did not improve from 1.00000
Epoch 8/40
Epoch 8: val_CER did not improve from 1.00000
Epoch 9/40
Epoch 9: val_CER did not improve from 1.00000
Epoch 10/40
Epoch 10: val_CER did not improve from 1.00000
Epoch 11/40
Epoch 11: val_CER did not improve from 1.00000
Epoch 12/40
Epoch 12: val_CER improved from 1.00000 to 0.84843, saving model to /content/drive/MyDrive/captcha/model/202403281555/model.h5
Epoch 13/40
Epoch 13: val_CER improved from 0.84843 to 0.73131, saving model to /content/drive/MyDrive/captcha/model/202403281555/model.h5
Epoch 14/40
Epoch 14: val_CER improved from 0.73131 to 0.56190, saving model to /content/drive/MyDrive/captcha/model/202403281555/model.h5
Epoch 15/