In [1]:
import logging

import dvc.api
import mlflow
from PIL import Image
from src.data.image_preprocessing import crop_image
from src.features.dataset import get_dataset
from src.features.dataset_generator import ImageDatasetType
from src.features.postprocessing import post_process_plate
from src.models.metrics import lev_dist
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from src.models.fetch_model import fetch_model
import numpy as np

2023-01-20 18:25:22.406558: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-01-20 18:25:22.409664: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-20 18:25:22.409678: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


2023-01-20 18:25:23,481 - root - INFO - Loaded environment variables


In [2]:
model_name = "test"
model_version = "1"
tr_ocr_processor = "microsoft/trocr-small-printed"
tr_ocr_model = "microsoft/trocr-small-printed"

model = fetch_model(model_name=model_name, model_version=model_version)

def evaluate_bbox_detection():
    # Evaluate bbox detection
    [loss, root_mean_squared_error] = model.evaluate(test_set_bbox)

    mlflow.log_metrics({
        "loss": loss,
        "root_mean_squared_error": root_mean_squared_error
    })


def evaluate_ocr():

    transformer_processor = TrOCRProcessor.from_pretrained(tr_ocr_processor)
    transformer_model = VisionEncoderDecoderModel.from_pretrained(tr_ocr_model)
    bboxes = model.predict(test_set_bbox, batch_size=16)
    
    _accuracy, _accuracy_post_processed, _lev_dist, _lev_dist_post_processed = 0, 0, 0, 0

    for (bbox, sample) in zip(bboxes, test_set_plates):
        image = sample[0][0].numpy().astype(np.uint8)
        plate = sample[1][0].numpy().decode()
        cropped_image = crop_image(image, bbox)
        cropped_image = Image.fromarray(cropped_image)
        pixel_values = transformer_processor(cropped_image, return_tensors="pt").pixel_values
        generated_ids = transformer_model.generate(pixel_values)
        generated_text = transformer_processor.batch_decode(
            generated_ids, skip_special_tokens=True)[0]

        _accuracy += 1 if generated_text == plate else 0
        _lev_dist += lev_dist(generated_text, plate)

        generated_text = post_process_plate(generated_text)

        _accuracy_post_processed += 1 if generated_text == plate else 0
        _lev_dist_post_processed += lev_dist(generated_text, plate)


    n_samples = len(bboxes)
    mlflow.log_metrics({
        "accuracy": _accuracy/n_samples,
        "accuracy_post_processed": _accuracy_post_processed/n_samples,
        "lev_dist": _lev_dist/n_samples,
        "lev_dist_post_processed": _lev_dist_post_processed/n_samples
    })

test_set_bbox = get_dataset(
    "test", dataset_generator_type=ImageDatasetType.BboxImagesDatasetGenerator, batch_size=1, shuffle=False)
test_set_plates = get_dataset(
    "test", dataset_generator_type=ImageDatasetType.PlateImagesDatasetGenerator, batch_size=1, shuffle=False)

run_name = f"test_{model_name}_v{model_version}"
with mlflow.start_run(run_name=run_name):
    evaluate_bbox_detection()
    evaluate_ocr()


2023-01-20 18:25:25,283 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): dagshub.com:443
2023-01-20 18:25:25,592 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "GET /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow/model-versions/get-download-uri?name=test&version=1 HTTP/1.1" 200 None
2023-01-20 18:25:25,825 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "GET /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow-artifacts/artifacts?path=3bc5e8d24d35422db879e9f1cda81180%2Fc0b4f19f5ebe454582a85345b3cc2328%2Fartifacts%2Fmodel HTTP/1.1" 200 None
2023-01-20 18:25:26,105 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "GET /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow-artifacts/artifacts?path=3bc5e8d24d35422db879e9f1cda81180%2Fc0b4f19f5ebe454582a85345b3cc2328%2Fartifacts%2Fmodel HTTP/1.1" 200 None
2023-01-20 18:25:26,119 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (2): dagshub.com:443
2023

2023-01-20 18:25:32.605286: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/gianfranco/.local/lib/python3.8/site-packages/cv2/../../lib64:
2023-01-20 18:25:32.605331: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-01-20 18:25:32.605373: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (gianfranco-HP-EliteBook-850-G8-Notebook-PC): /proc/driver/nvidia/version does not exist
2023-01-20 18:25:32.605775: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the a

2023-01-20 18:25:33,156 - root - INFO - Loaded registered model version from URI: 'models:/test/1'
2023-01-20 18:25:33,234 - git.cmd - DEBUG - Popen(['git', 'version'], cwd=/home/gianfranco/Desktop/uni/plate-recognition/notebooks/prototyping, universal_newlines=False, shell=None, istream=None)
2023-01-20 18:25:33,258 - git.cmd - DEBUG - Popen(['git', 'version'], cwd=/home/gianfranco/Desktop/uni/plate-recognition/notebooks/prototyping, universal_newlines=False, shell=None, istream=None)
2023-01-20 18:25:33,990 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "POST /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow/runs/create HTTP/1.1" 200 None
2023-01-20 18:25:35,037 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "POST /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow/runs/log-batch HTTP/1.1" 200 None
2023-01-20 18:25:35,043 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2023-01-20 18:25:35,832 - urllib3.c

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


2023-01-20 18:25:35,839 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2023-01-20 18:25:36,651 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /microsoft/trocr-small-printed/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
2023-01-20 18:25:36,852 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
2023-01-20 18:25:37,675 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /microsoft/trocr-small-printed/resolve/main/config.json HTTP/1.1" 200 0


Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-small-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

2023-01-20 18:26:08,733 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "POST /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow/runs/log-batch HTTP/1.1" 200 None
2023-01-20 18:26:09,002 - urllib3.connectionpool - DEBUG - https://dagshub.com:443 "POST /gianfrancodemarco/plate-recognition.mlflow/api/2.0/mlflow/runs/update HTTP/1.1" 200 None
