In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

In [None]:
pip install kaggle




In [None]:
import kagglehub


path = kagglehub.dataset_download("anggadwisunarto/text-deblurring-dataset-with-psf-for-ocr")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'text-deblurring-dataset-with-psf-for-ocr' dataset.
Path to dataset files: /kaggle/input/text-deblurring-dataset-with-psf-for-ocr


In [None]:
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
char_to_num = {c:i+1 for i,c in enumerate(characters)}
num_to_char = {i:c for c,i in char_to_num.items()}
num_classes = len(characters)


In [None]:
def preprocess_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (128, 32))
    img = cv2.GaussianBlur(img, (5,5), 0)
    img = cv2.adaptiveThreshold(
        img, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY, 11, 2
    )
    img = img / 255.0
    return img.reshape(32,128,1)


In [None]:
def encode_text(text):
    return [char_to_num[c] for c in text if c in char_to_num]


In [None]:
IMAGE_DIR = os.path.join(path, "blurred")
LABEL_FILE = os.path.join(path, "labels.csv")

In [None]:
print(os.listdir(path))

['BMVC_image_quality_test_data', 'BMVC_OCR_test_data', 'BMVC_image_data']


In [None]:
class OCRGenerator(tf.keras.utils.Sequence):
    def __init__(self, csv_path, img_dir, batch_size=16):
        self.data = pd.read_csv(csv_path)
        self.img_dir = img_dir
        self.batch_size = batch_size

    def __len__(self):
        return len(self.data)//self.batch_size

    def __getitem__(self, idx):
        batch = self.data.iloc[idx*self.batch_size:(idx+1)*self.batch_size]
        X, Y, input_len, label_len = [], [], [], []

        for _, row in batch.iterrows():
            img = preprocess_image(os.path.join(self.img_dir, row['filename']))
            label = encode_text(row['text'])

            X.append(img)
            Y.append(label)
            input_len.append(32)
            label_len.append(len(label))

        return {
            "image": np.array(X),
            "label": tf.keras.preprocessing.sequence.pad_sequences(Y, padding="post"),
            "input_length": np.array(input_len),
            "label_length": np.array(label_len)
        }, np.zeros(len(X))


In [None]:
print(os.listdir(os.path.join(path, 'BMVC_OCR_test_data')))

['n_08', 'n_01', 'n_07', 'n_06', 'psf', 'n_00', 'orig', 'n_09', 'n_03', 'n_02', 'n_04', 'n_05', 'n_10']


In [None]:
class CTCLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        y_pred, labels, input_len, label_len = inputs
        loss = K.ctc_batch_cost(labels, y_pred, input_len, label_len)
        self.add_loss(loss)
        return y_pred
