In [4]:
import albumentations as albu
import numpy as np
import pandas as pd
import torch
from omegaconf import DictConfig
from torchvision.transforms import ToPILImage

from src.dataset.dataset import BarcodeDataset
from src.lightning_module import BarcodeRunner
from src.dataset.augmentations import get_transforms
from src.settings.config import Config
from src.utils.predict_utils import matrix_to_string

In [5]:
DEVICE = "cuda:0"
VOCAB = "0123456789"

In [11]:
config: DictConfig = Config.from_yaml("../configs/config.yaml")
config.transforms_settings.augmentations = False
transforms = get_transforms(config.transforms_settings)

In [21]:
df = pd.read_csv("../data/val_data.csv")
dataset = BarcodeDataset(
    dataframe=df,
    image_folder="../data/",
    transforms=transforms,
)
len(dataset)

54

In [15]:
module = BarcodeRunner.load_from_checkpoint(
    "/home/asgordeev/recnet/experiments/first_run/epoch_epoch=99-val_CTCLoss=2.330.ckpt"
)
_ = module.to(DEVICE)
_ = module.eval()

In [None]:
def get_pil_img(images):
    # Convert tensor to numpy array
    def tensor_to_np(tensor):
        return tensor.permute(1, 2, 0).cpu().numpy()

    # Convert numpy array to tensor
    def np_to_tensor(np_array):
        return torch.from_numpy(np_array).permute(2, 0, 1)

    to_pil = ToPILImage()
    denormalize = albu.Normalize(
        mean=(-np.array([0.485, 0.456, 0.406]) / np.array([0.229, 0.224, 0.225])),
        std=(1 / np.array([0.229, 0.224, 0.225])),
        max_pixel_value=1.0,
    )
    denormalized_tensor = denormalize(image=tensor_to_np(images[0]))["image"]
    return to_pil(np_to_tensor(denormalized_tensor))

In [38]:
pr_texts = []
images = []

for i in range(len(dataset)):
    image, _, _ = dataset[i]
    predict = module(image[None].to(DEVICE)).cpu().detach()
    string_pred, _ = matrix_to_string(predict, VOCAB)
    pr_texts.append(string_pred[0])
    images.append(get_pil_img(image))

In [None]:
for img, pred in zip(images, pr_texts):
    img.show()
    print(pred, "\n")