# How to train keras_OCR with ISIS ionograms

## 1. Prepare dataset
I selected randomly 60 ionograms of ISIS. I than created a csv file with annotations. Here's how it looks.

In [45]:
import pandas as pd

ann = pd.read_csv('L:/DATA/ISIS/keras_ocr/annotations/global_annotations.csv')
ann.head()

Unnamed: 0,image_path,x_min,y_min,x_max,y_max,text_content
0,1.png,316,361,601,402,36 00 69 111 0354 35
1,2.png,457,363,746,405,36 00 69 111 0245 04
2,3.png,481,369,767,403,36 00 69 101 0245 31
3,4.png,510,366,793,402,36 00 69 111 0245 58
4,5.png,492,368,774,402,36 00 69 111 0246 25


The columns (x_min, y_min) and (x_max, y_max) define the top left corner and the bottom right corner of a box around the metadata. I found those values manually in paint. It took me around 1h30 to do this.

## 2. Split the dataset
I need to create three subsets : training, validation and test sets. Since I only have 60 images, I'll use the 70-15-15 ratio to create the subsets. The first step is to shuffle the dataset randomly.

In [46]:
ann = ann.sample(frac = 1)
ann.reset_index(drop=True, inplace=True)
ann.head()

Unnamed: 0,image_path,x_min,y_min,x_max,y_max,text_content
0,26.png,160,374,561,404,41 16 71 364 1949 34
1,9.png,421,367,709,400,36 00 69 111 0248 13
2,18.png,232,465,727,502,45 19 72 065 0205 01
3,4.png,510,366,793,402,36 00 69 111 0245 58
4,29.png,241,374,642,404,41 16 71 364 1950 18


In [47]:
# Create the subsets and save them
train = ann[0:42]
train.to_csv('L:/DATA/ISIS/keras_ocr/train/annotations/train_annotations.csv',index=False)

val = ann[42:51]
val.to_csv('L:/DATA/ISIS/keras_ocr/val/annotations/val_annotations.csv',index=False)

test = ann[51:60]
test.to_csv('L:/DATA/ISIS/keras_ocr/test/annotations/test_annotations.csv',index=False)     

In [48]:
import shutil
import os

# Initialise the directories for training, validation and testing (remove current images)
train_dir = 'L:/DATA/ISIS/keras_ocr/train/'
val_dir = 'L:/DATA/ISIS/keras_ocr/val/'
test_dir = 'L:/DATA/ISIS/keras_ocr/test/'

for path in [train_dir, val_dir, test_dir]:
    for f in os.listdir(path+'images/'):
        os.remove(os.path.join(path+'images/', f))

all_images = os.listdir('L:/DATA/ISIS/keras_ocr/all images')

# copy images for all subsets
for image in all_images :
    full_name = os.path.join('L:/DATA/ISIS/keras_ocr/all images/', image)
    if image in train['image_path'].to_list():
        shutil.copy(full_name, train_dir+'images/'+image)
    elif image in val['image_path'].to_list():
        shutil.copy(full_name, val_dir+'images/'+image)
    elif image in test['image_path'].to_list():
        shutil.copy(full_name, test_dir+'images/'+image)

## 3. Create data generators

To create our data generators, we'll code our own class.

In [49]:
import numpy as np
import cv2
from tensorflow.keras.utils import Sequence

In [91]:
class OCRDataGenerator(Sequence):
    def __init__(self, annotations_file, image_dir, batch_size):
        self.annotations = pd.read_csv(annotations_file)
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.annotations) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size : (idx + 1) * self.batch_size]
        batch_annotations = self.annotations.iloc[batch_indices]
        images, targets = self._load_batch(batch_annotations)
        return images, targets

    def _load_batch(self, batch_annotations):
        images = []
        targets = []

        for _, row in batch_annotations.iterrows():
            image_path = os.path.join(self.image_dir, row['image_path'])
            image = cv2.imread(image_path)
            
            if image is None:
                print(f"Warning: Image not found or cannot be loaded - {image_path}")
                continue
                
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            images.append(image)

            # Process the annotations to create the targets for the detection model
            # The format for the detection target should be a list of dictionaries,
            # each containing the bounding box coordinates and text content.
            target = {
                'boxes': np.array([[row['x_min'], row['y_min'], row['x_max'], row['y_max']]]),
                'text': [row['text_content']]
            }
            targets.append(target)

        images = np.array(images,dtype='object')
        targets = np.array(targets,dtype='object')
        return images, targets

    def on_epoch_end(self):
        self.indices = np.arange(len(self.annotations))
        np.random.shuffle(self.indices)

We then create a generator for training, validation and testing.

In [92]:
train_generator = OCRDataGenerator(annotations_file=train_dir+'annotations/train_annotations.csv',
                                   image_dir=train_dir+'images/',
                                   batch_size=7)

val_generator = OCRDataGenerator(annotations_file=val_dir+'annotations/val_annotations.csv',
                                 image_dir=val_dir+'images/',
                                 batch_size=7)

test_generator = OCRDataGenerator(annotations_file=test_dir+'annotations/test_annotations.csv',
                                  image_dir=test_dir+'images/',
                                  batch_size=7)


In [98]:
from keras_ocr.detection import Detector
from keras_ocr.recognition import Recognizer
from keras_ocr.pipeline import Pipeline
from keras_ocr.datasets import get_detector_image_generator

In [103]:
detector_generator = get_detector_image_generator(train_dir+'images/'+train['image_path'],width=400,height=1100)
detector_generator

<generator object get_detector_image_generator at 0x00000268028B43C0>

In [101]:
detector = Detector()
recognizer = Recognizer()

Looking for C:\Users\mfortier\.keras-ocr\craft_mlt_25k.h5
Looking for C:\Users\mfortier\.keras-ocr\crnn_kurapan.h5


In [104]:
detector.model.fit(detector_generator, batch_size=7, epochs=42)

ValueError: too many values to unpack (expected 3)