In [1]:
import os

import cv2
import numpy
from PIL import Image
from optimum.intel import OVModelForVision2Seq, OVWeightQuantizationConfig
from roboflow import Roboflow
from transformers import ImageToTextPipeline, TrOCRProcessor
from ultralytics import YOLO

import wandb
from taiwan_license_plate_recognition.helper import get_num_of_workers

In [2]:
%load_ext dotenv
%dotenv
%matplotlib inline

In [3]:
project_root: str = os.environ.get("PROJECT_ROOT", "")
num_workers: int = get_num_of_workers()

In [4]:
run = wandb.init(project="taiwan-license-plate-recognition", job_type="other", group="combination")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mhermeschen1116[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
roboflow_agent = Roboflow(api_key=os.environ.get("ROBOFLOW_API_KEY"))

In [6]:
dataset = (
	roboflow_agent.workspace("work-c9x8f")
	.project("license-plate-detection-mdsot")
	.version(6)
	.download("yolov8-obb", location=f"{project_root}/datasets/roboflow")
)

loading Roboflow workspace...
loading Roboflow project...


In [7]:
model_path: str = run.use_model("license-plate-detection:latest")

[34m[1mwandb[0m:   3 of 3 files downloaded.  


In [8]:
os.replace(model_path, f"{model_path}_openvino_model")

In [9]:
model = YOLO(f"{model_path}_openvino_model", task="obb")

In [10]:
test_image_path: str = (
	f"{project_root}/datasets/roboflow/train/images/000001_jpg.rf.27e1551f828338908b6c02b147c4d366.jpg"
)

In [11]:
results = model.predict(test_image_path, device="cpu")

Loading /home/hermeschen/Repo/taiwan-license-plate-recognition/src/scripts/artifacts/license-plate-detection:v0_openvino_model for OpenVINO inference...

image 1/1 /home/hermeschen/Repo/taiwan-license-plate-recognition/datasets/roboflow/train/images/000001_jpg.rf.27e1551f828338908b6c02b147c4d366.jpg: 640x640 39.4ms
Speed: 10.6ms preprocess, 39.4ms inference, 18.0ms postprocess per image at shape (1, 3, 640, 640)


In [12]:
crop_image = []

In [13]:
for idx, result in enumerate(results):
	for obb in result.obb.xyxyxyxy:
		points = obb.cpu().numpy().reshape((-1, 1, 2)).astype(int)
		mask = cv2.fillPoly(numpy.zeros_like(result.orig_img), [points], (255, 255, 255))
		cropped_img = cv2.bitwise_and(result.orig_img, mask)
		crop_image.append(Image.fromarray(cropped_img))

In [14]:
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed", clean_up_tokenization_spaces=True)

In [15]:
quantization_config = OVWeightQuantizationConfig()
ov_config = {"PERFORMANCE_HINT": "LATENCY", "CACHE_DIR": f"{project_root}/.ov_cache"}

In [16]:
model = OVModelForVision2Seq.from_pretrained(
	"DunnBC22/trocr-base-printed_license_plates_ocr",
	export=True,
	ov_config=ov_config,
	quantization_config=quantization_config,
	device="cpu",
)

Non-default generation parameters: {'max_length': 64, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data fl

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (73 / 73)              │ 100% (73 / 73)                         │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (122 / 122)            │ 100% (122 / 122)                       │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (98 / 98)              │ 100% (98 / 98)                         │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

In [17]:
recognizer = ImageToTextPipeline(
	model=model,
	tokenizer=processor.tokenizer,
	image_processor=processor,
	framework="pt",
	task="image-to-text",
	num_workers=num_workers,
	device="cpu",
	torch_dtype="auto",
)

In [18]:
recognizer(crop_image)

[[{'generated_text': 'J69302'}]]