In [4]:
from handwriting_recognition.label_converter import LabelConverter
from torch.utils.data.dataloader import DataLoader
from handwriting_recognition.dataset import HandWritingDataset, train_augmentations
from handwriting_recognition.modelling_utils import get_image_model_and_processor, get_optimizer
from handwriting_recognition.utils import TrainingConfig, get_dataset_folder_path
from pathlib import Path

In [8]:
config_path = Path(get_dataset_folder_path()).parent.joinpath("handwriting_recognition", "configs", "default_config").with_suffix(".json")
config = TrainingConfig.from_path(config_path=config_path)

In [9]:
image_model, image_processor = get_image_model_and_processor(
    model_name=config.feature_extractor_config.hf_model_name,
    processor_name=config.feature_extractor_config.hf_pre_processor_name,
)

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
data_train = HandWritingDataset(
    data_path=get_dataset_folder_path() / "pre_processed" / "train.csv",
    image_processor=image_processor,
    augmentations=train_augmentations(),
)



In [15]:
train_loader = DataLoader(
    data_train,
    batch_size=config.batch_size,
    pin_memory=False,
    shuffle=True,
    drop_last=True,
)

In [16]:
converter = LabelConverter(character_set=data_train.char_set)

In [20]:
images, labels = next(iter(train_loader))

In [22]:
images.shape

torch.Size([16, 3, 224, 224])

In [24]:
labels

('eleonore',
 'elodie',
 'villemonteix',
 'monange',
 'gasjine',
 'noel',
 'naomi',
 'roux',
 'bieliczky',
 'yassine',
 'nicolas',
 'chibane',
 'lepere',
 'marinella',
 'rogiers',
 'romain')

In [32]:
a = converter.encode(labels)

In [35]:
for i in a:
    print(i.shape)

torch.Size([16, 14])
torch.Size([16])
