In [27]:
import os

import cv2
import numpy
from PIL import Image
from optimum.intel import OVModelForVision2Seq, OVWeightQuantizationConfig
from roboflow import Roboflow
from transformers import ImageToTextPipeline, TrOCRProcessor
from ultralytics import YOLO

import wandb
from taiwan_license_plate_recognition.helper import get_num_of_workers

In [28]:
%load_ext dotenv
%dotenv
%matplotlib inline

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [29]:
project_root: str = os.environ.get("PROJECT_ROOT", "")
num_workers: int = get_num_of_workers()

In [30]:
run = wandb.init(project="taiwan-license-plate-recognition", job_type="other", group="combination")

VBox(children=(Label(value='0.047 MB of 0.047 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [31]:
roboflow_agent = Roboflow(api_key=os.environ.get("ROBOFLOW_API_KEY"))

In [32]:
dataset = (
	roboflow_agent.workspace("work-c9x8f")
	.project("license-plate-detection-mdsot")
	.version(6)
	.download("yolov8-obb", location=f"{project_root}/datasets/roboflow")
)

loading Roboflow workspace...
loading Roboflow project...


In [33]:
model_path: str = run.use_model("license-plate-detection:latest")

[34m[1mwandb[0m:   3 of 3 files downloaded.  


In [34]:
os.replace(model_path, f"{model_path}_openvino_model")

In [35]:
model = YOLO(f"{model_path}_openvino_model", task="obb")

In [36]:
test_image_path: str = (
	f"{project_root}/datasets/roboflow/train/images/000001_jpg.rf.27e1551f828338908b6c02b147c4d366.jpg"
)

In [37]:
results = model.predict(test_image_path, device="cpu")

Loading /home/hermeschen/Repo/taiwan-license-plate-recognition/src/scripts/artifacts/license-plate-detection:v0_openvino_model for OpenVINO inference...

image 1/1 /home/hermeschen/Repo/taiwan-license-plate-recognition/datasets/roboflow/train/images/000001_jpg.rf.27e1551f828338908b6c02b147c4d366.jpg: 640x640 67.0ms
Speed: 16.1ms preprocess, 67.0ms inference, 37.5ms postprocess per image at shape (1, 3, 640, 640)


In [38]:
crop_image = []

In [39]:
for idx, result in enumerate(results):
	for obb in result.obb.xyxyxyxy:
		points = obb.cpu().numpy().reshape((-1, 1, 2)).astype(int)
		mask = cv2.fillPoly(numpy.zeros_like(result.orig_img), [points], (255, 255, 255))
		cropped_img = cv2.bitwise_and(result.orig_img, mask)
		crop_image.append(Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)))

In [40]:
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed", clean_up_tokenization_spaces=True)

In [41]:
quantization_config = OVWeightQuantizationConfig()
ov_config = {"PERFORMANCE_HINT": "LATENCY", "CACHE_DIR": f"{project_root}/.ov_cache"}

In [42]:
model = OVModelForVision2Seq.from_pretrained(
	"DunnBC22/trocr-base-printed_license_plates_ocr",
	export=True,
	ov_config=ov_config,
	quantization_config=quantization_config,
	device="cpu",
)

Non-default generation parameters: {'max_length': 64, 'early_stopping': True, 'num_beams': 4, 'length_penalty': 2.0, 'no_repeat_ngram_size': 3}
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data fl

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (73 / 73)              │ 100% (73 / 73)                         │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (122 / 122)            │ 100% (122 / 122)                       │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym                 │ 100% (98 / 98)              │ 100% (98 / 98)                         │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙


Output()

In [43]:
recognizer = ImageToTextPipeline(
	model=model,
	tokenizer=processor.tokenizer,
	image_processor=processor,
	framework="pt",
	task="image-to-text",
	num_workers=num_workers,
	device="cpu",
	torch_dtype="auto",
)

In [44]:
recognizer(crop_image)

[[{'generated_text': 'J69302'}]]