In [1]:
#Dataset
#https://huggingface.co/datasets/Teklia/IAM-line

import pandas as pd
from PIL import Image
import numpy as np
import io
import pyarrow

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from tensorflow.keras.models import Model

In [2]:
train_df = pd.read_parquet(r"D:\Jupyter\Handwriting\IAM-line\data\train_processed.parquet")
test_df = pd.read_parquet(r"D:\Jupyter\Handwriting\IAM-line\data\test_processed.parquet")
val_df = pd.read_parquet(r"D:\Jupyter\Handwriting\IAM-line\data\validation_processed.parquet")

In [3]:
def convert_image_to_numpy(image_bytes):
    try:
        # Load the image
        image = Image.open(io.BytesIO(image_bytes))
        # Convert the image to a numpy array and scale to [0, 1]
        image_array = np.array(image) / 255.0
        return image_array
    except Exception as e:
        print(f"Error converting image: {e}")
        return None

# Process images one at a time
for df in [train_df, test_df, val_df]:
    for index, row in df.iterrows():
        image_bytes = row['image']
        df.at[index, 'image'] = convert_image_to_numpy(image_bytes)

In [4]:
train_df.head()

Unnamed: 0,text,image
0,put down a resolution on the subject,"[[0.9882352941176471, 0.9764705882352941, 0.97..."
1,and he is to be backed by Mr. Will,"[[0.9647058823529412, 0.9333333333333333, 0.93..."
2,nominating any more Labour life Peers,"[[0.984313725490196, 0.984313725490196, 0.9843..."
3,M Ps tomorrow. Mr. Michael Foot has,"[[0.9529411764705882, 0.8666666666666667, 0.89..."
4,"Griffiths, M P for Manchester Exchange .","[[0.984313725490196, 0.984313725490196, 0.9843..."


In [7]:
train_images = np.empty((len(train_df), 128, 2000), dtype=np.uint8)
for i, image in enumerate(train_df['image'].values):
    train_images[i] = image
train_images = np.expand_dims(train_images, axis=-1)

In [8]:
char_set = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 '
char_to_num = {char: i for i, char in enumerate(char_set)}

In [9]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences

def encode_text(text, max_len=32):
    encoded = [char_to_num.get(char, len(char_to_num) - 1) for char in text]
    return pad_sequences([encoded], maxlen=max_len, padding='post')[0]

In [10]:
max_len = 80
train_texts = np.stack(train_df['text'].apply(lambda x: encode_text(x, max_len)))

In [11]:
num_classes = len(char_to_num)
train_texts = tf.keras.utils.to_categorical(train_texts, num_classes)

In [12]:
train_images.shape, train_texts.shape

((6482, 128, 2000, 1), (6482, 80, 63))

In [13]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, LSTM, Bidirectional, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam

def create_crnn_model(input_shape, num_classes):
    input_img = Input(shape=input_shape, name='image_input')
    
    # Convolutional layers for feature extraction
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(input_img)
    x = MaxPooling2D((2, 2), strides=2)(x)
    x = BatchNormalization()(x)
    
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D((2, 2), strides=2)(x)
    x = BatchNormalization()(x)
    
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = MaxPooling2D((2, 2), strides=2)(x)
    x = BatchNormalization()(x)
    
    # Adjust the pooling layers to achieve the desired sequence length
    # Assuming the input shape is (128, 2000, 1)
    # After the convolutions and max-pooling, the dimensions should be (16, 250, 128)
    # We need to reshape to (80, *) for the LSTM layers
    
    # Reshape for LSTM layers
    shape = tf.keras.backend.int_shape(x)
    # Calculate the new shape
    new_shape = (80, shape[1] * shape[2] * shape[3] // 80)
    x = Reshape(target_shape=new_shape)(x)
    
    # Recurrent layers for sequence prediction
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = BatchNormalization()(x)
    
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = BatchNormalization()(x)
    
    # Dense layer for character classification
    x = Dense(num_classes, activation='softmax')(x)
    
    # Define the model
    model = Model(inputs=input_img, outputs=x)
    return model

# Number of classes for classification
num_classes = 63

# Create the model
input_shape = (128, 2000, 1)
crnn_model = create_crnn_model(input_shape, num_classes)

# Compile the model
crnn_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
crnn_model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image_input (InputLayer)    [(None, 128, 2000, 1)]    0         
                                                                 
 conv2d (Conv2D)             (None, 128, 2000, 32)     320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 1000, 32)     0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 64, 1000, 32)     128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 1000, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 32, 500, 64)      0     

In [14]:
with tf.device('/CPU:0'):
    history = crnn_model.fit(
        train_images, train_texts,
        epochs=2, batch_size=32
    )


Epoch 1/2
Epoch 2/2


In [16]:
crnn_model.save("trial")



INFO:tensorflow:Assets written to: trial\assets


INFO:tensorflow:Assets written to: trial\assets
