In [1]:
# Memilih GPU yang akan digunakan (contohnya: GPU #7)
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '4'

In [2]:
model_name = 'ctc-ftex-rnn'

In [3]:
feature_extractor = 'efficientnet_b0'

In [4]:
import importlib
import pegon_utils
importlib.reload(pegon_utils)
from pegon_utils import PEGON_CHARS, CHAR_MAP

In [5]:
for i in CHAR_MAP.keys():
    try:
        assert len(i) == 1
    except AssertionError:
        print(i)
        raise

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils as utils
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms

from PIL import Image

import json
import os
import glob
import re
import datetime
import shutil
import pickle
import unicodedata

from functools import partial

from tqdm import tqdm
import matplotlib.pyplot as plt

import numpy as np
import random

import matplotlib.pyplot as plt

import timm

seed = 2023
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [7]:
os.makedirs(model_name, exist_ok=True)

In [8]:
importlib.reload(pegon_utils)
from pegon_utils import OCRDataset

importlib.reload(pegon_utils)
from pegon_utils import OCRDataset, QuranAnnotatedDataset
from torch.utils.data import random_split, ConcatDataset

pegon_synth_dataset = OCRDataset().load('/workspace/Dataset/Synthesized-split/metadata.json')

pegon_synth_dataset.char_map = CHAR_MAP

dataset_transforms = transforms.Compose([
    transforms.Resize((pegon_synth_dataset.avg_img_h,
                       pegon_synth_dataset.avg_img_w)),
    transforms.RandomHorizontalFlip(p=1),
    transforms.ToTensor(),
  ])

pegon_synth_dataset.transform = dataset_transforms

train_synth_dataset, val_synth_dataset = random_split(pegon_synth_dataset,
                                                      lengths=[round(len(pegon_synth_dataset) * frac) for frac in [0.7, 0.3]])

quran_train_dataset = QuranAnnotatedDataset('/workspace/Dataset/Quran data set/dicriticText/traning',
                      image_transform=dataset_transforms)
quran_test_dataset = QuranAnnotatedDataset('/workspace/Dataset/Quran data set/dicriticText/test',
                      image_transform=dataset_transforms)

train_dataset = ConcatDataset((train_synth_dataset, quran_train_dataset))
val_dataset = ConcatDataset((val_synth_dataset, quran_test_dataset))

In [9]:
assert pegon_synth_dataset.char_map == CHAR_MAP

In [10]:
import models
importlib.reload(models)
from models import CTCFtEx

In [11]:
importlib.reload(pegon_utils)
from pegon_utils import model_length

model = CTCFtEx(ids_to_chars=PEGON_CHARS,
                image_width=pegon_synth_dataset.avg_img_w, 
                image_height=pegon_synth_dataset.avg_img_h,
                model_output_len=model_length(b=2,c=100)(pegon_synth_dataset.max_seq_len),
                feature_extractor=feature_extractor,
                freeze_extractor=False)

In [12]:
importlib.reload(pegon_utils)
from pegon_utils import CTCTrainer, FocalCTCLoss

# Train the model
trainer = CTCTrainer(model=model,
                     max_norm=None,
                     optimizer=optim.AdamW(model.parameters(), lr=1e-4),
#                      criterion=FocalCTCLoss(zero_infinity=True, alpha=0.25, gamma=0.75),
#                      criterion=FocalCTCLoss(zero_infinity=True, alpha=0.5, gamma=2),
                     batch_size=4,
                     num_workers=2,
                     dataset=train_dataset)

In [13]:
test = trainer.model(random.choice(train_dataset)[0].unsqueeze(0).to(trainer.device))
assert test.shape[-1] == len(CHAR_MAP)
assert not test.isnan().any()

In [14]:
timestamp = datetime.datetime.now()
print(timestamp)

2023-06-08 15:36:13.245745


In [15]:
import importlib
import pegon_utils
importlib.reload(pegon_utils)
from pegon_utils import FilenameOCRDataset, PegonAnnotatedDataset, evaluate
from pegon_utils import CTCDecoder, BestPathDecoder, evaluate

annotated_dataset = PegonAnnotatedDataset('/workspace/Dataset/pegon-annotated-dataset-split-v3',
                                          image_transform=dataset_transforms)

annotated_dataloader = DataLoader(annotated_dataset, shuffle=False,
                                  batch_size=trainer.batch_size,
                                  num_workers=trainer.num_workers,
                                  collate_fn=trainer.collate_fn)

def eval_routine(model, dataloader):
    decoder = BestPathDecoder(model, CHAR_MAP, blank_char=PEGON_CHARS[0])
    cers, wers = evaluate(decoder, annotated_dataloader)
    return np.mean(cers)

In [None]:
model_path=f'{model_name}/{timestamp}.{feature_extractor}.pt'
trainer.train(num_epochs=10, save_path=model_path,
              val_dataloader=annotated_dataloader, eval_routine=eval_routine)

  0%|          | 0/15072 [00:00<?, ?it/s]

  0%|          | 0/1028 [00:00<?, ?it/s]

In [22]:
model_path

'ctc-ftex-rnn/2023-06-08 15:36:13.245745.efficientnet_b0.pt'

In [30]:
trainer.loss_history.val_history

[0.49439365367885735,
 0.4649521003369305,
 0.46942597726362206,
 0.45580632281850225,
 0.5584507738855983,
 0.4687453746051691,
 0.4889435504205428,
 0.4728074257923089,
 0.45737004785032037,
 0.4888991735716073]

In [None]:
trainer.plot_history(path=f'{model_name}/{timestamp}.{feature_extractor}.train.png')
trainer.save(model_path)

In [None]:
importlib.reload(pegon_utils)
from pegon_utils import CTCDecoder, BestPathDecoder, evaluate, plot_cer_wer

In [None]:
dataloader = DataLoader(val_dataset,
                        batch_size=trainer.batch_size,
                        shuffle=True,
                        num_workers=trainer.num_workers,
                        collate_fn=trainer.collate_fn)

In [None]:
model_path = f'{model_name}/{timestamp}.{feature_extractor}.pt'

In [None]:
decoder = BestPathDecoder.from_path(model_path, CHAR_MAP, blank_char=PEGON_CHARS[0])
cers, wers = evaluate(decoder, dataloader)

In [None]:
plot_cer_wer(cers, wers, path=f'{model_name}/{timestamp}.{feature_extractor}.wer-cer.png')

In [None]:
# demo

import arabic_reshaper
from bidi.algorithm import get_display

to_arabic_display = lambda text: get_display(arabic_reshaper.reshape(text))
img, label, _ = dataloader.collate_fn([random.choice(val_dataset)])

predicted = decoder.infer(img.cuda())[0]
print(predicted)

tensor_to_display = lambda x : transforms.ToPILImage()(transforms.RandomHorizontalFlip(p=1)(x))

plt.imshow(tensor_to_display(img[0])); plt.title(f'predicted = {to_arabic_display(predicted)}'); plt.show()

In [None]:
import importlib
import pegon_utils
importlib.reload(pegon_utils)
from pegon_utils import FilenameOCRDataset, PegonAnnotatedDataset

annotated_dataset = PegonAnnotatedDataset('/workspace/Dataset/pegon-annotated-dataset-split-v3',
                                          image_transform=dataset_transforms)

annotated_dataloader = DataLoader(annotated_dataset, shuffle=True,
                                  batch_size=trainer.batch_size,
                                  num_workers=trainer.num_workers,
                                  collate_fn=trainer.collate_fn)

cers, wers = evaluate(decoder, annotated_dataloader)

In [None]:
plot_cer_wer(cers, wers, path=f'{model_name}/{timestamp}.{feature_extractor}.eval.wer-cer.png')

In [None]:
# demo

import arabic_reshaper
from bidi.algorithm import get_display

to_arabic_display = lambda text: get_display(arabic_reshaper.reshape(text))
img, label, _ = dataloader.collate_fn([random.choice(annotated_dataset)])

predicted = decoder.infer(img.cuda())[0]
print(predicted)

tensor_to_display = lambda x : transforms.ToPILImage()(transforms.RandomHorizontalFlip(p=1)(x))

plt.imshow(tensor_to_display(img[0])); plt.title(f'predicted = {to_arabic_display(predicted)}'); plt.show()