In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import zipfile

zip_path = "/content/drive/MyDrive/detection.zip"
extract_path = "/content/dataset/detection"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
detection_yaml = """path: /content/dataset/detection
train: images/train
val: images/val
test: images/test

names:
  0: "license_plate"
"""

# Write to a file
with open("detection.yaml", "w") as f:
    f.write(detection_yaml)

In [None]:
!pip install ultralytics

In [None]:
from ultralytics import YOLO

# Load YOLOv11 model with pretrained weights
model = YOLO("yolo11n.pt")  # Load YOLOv11 with pretrained weights

# Train the model
model.train(
    data="/content/detection.yaml",   # Path to your dataset YAML file
    epochs=300,                      # Number of training epochs
    imgsz=640,                      # Image size (resize to 640*640)
    batch=128,                       # Batch size
    device="cuda",                  # Use GPU for training
    pretrained=True,                # Use pretrained weights for transfer learning
    patience=50,
    )

In [None]:
from ultralytics import YOLO

# Load YOLOv11 model with pretrained weights
model = YOLO("/content/drive/MyDrive/detection.pt")  # Load YOLOv11 with pretrained weights

In [None]:
model.val(data='/content/detection.yaml', split='test')

In [None]:
# Export the model to ONNX format
model.export(format="onnx")  # creates 'yolo11n.onnx'

# Load the exported ONNX model
onnx_model = YOLO("/content/drive/MyDrive/detection.onnx")

# Run inference
onnx_model.val(data='/content/detection.yaml', split='test')

In [None]:
# Export the model to NCNN format
model.export(format="ncnn")  # creates '/yolo11n_ncnn_model'

# Load the exported NCNN model
ncnn_model = YOLO("/content/drive/MyDrive/detection_ncnn_model")

# Run inference
ncnn_model.val(data='/content/detection.yaml', split='test')

In [None]:
# Export the model to TensorRT format
model.export(format="engine")  # creates 'detection.engine'

# Load the exported TensorTR model
tensorrt_model = YOLO("/content/drive/MyDrive/detection.engine")

# Run inference
tensorrt_model.val(data='/content/detection.yaml', split='test')

In [None]:
import zipfile

zip_path = "/content/drive/MyDrive/ocr.zip"
extract_path = "/content/dataset"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
!git clone https://github.com/PaddlePaddle/PaddleOCR.git

In [None]:
!pip install paddlepaddle-gpu
!pip install pyclipper
!pip install lmdb
!pip install rapidfuzz

In [None]:
!wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar

In [None]:
!tar -xf /content/en_PP-OCRv3_rec_train.tar && rm /content/en_PP-OCRv3_rec_train.tar

In [None]:
characters = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ&'

# Specify the output file path
output_file = '/content/character_dict.txt'

# Open the file in write mode
with open(output_file, 'w') as file:
    # Write each character on a new line
    for char in characters:
        file.write(char + '\n')

In [None]:
ocr_yaml = """Global:
  debug: false
  use_gpu: true
  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
  save_model_dir: ./output/v3_en_mobile
  save_epoch_step: 3
  eval_batch_step: [0, 117]
  cal_metric_during_train: true
  pretrained_model:
  checkpoints:
  save_inference_dir:
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: /content/character_dict.txt
  max_text_length: &max_text_length 8
  infer_mode: false
  use_space_char: true
  distributed: true
  save_res_path: ./output/rec/predicts_ppocrv3_en.txt


Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: Cosine
    learning_rate: 0.0005
    warmup_epoch: 10
  regularizer:
    name: L2
    factor: 1.0e-03


Architecture:
  model_type: rec
  algorithm: SVTR_LCNet
  Transform:
  Backbone:
    name: MobileNetV1Enhance
    scale: 0.5
    last_conv_stride: [1, 2]
    last_pool_type: avg
  Neck:
    name: SequenceEncoder
    encoder_type: svtr
    dims: 64
    depth: 2
    hidden_dims: 80
    use_guide: False
  Head:
    name: CTCHead
    fc_decay: 0.001
Loss:
  name: CTCLoss

PostProcess:
  name: CTCLabelDecode

Metric:
  name: RecMetric
  main_indicator: acc
  ignore_space: False

Train:
  dataset:
    name: SimpleDataSet
    data_dir: /content/dataset/ocr
    ext_op_transform_idx: 1
    label_file_list:
    - /content/dataset/ocr/train_list.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - SVTRRecResizeImg:
        image_shape: [3, 48, 320]
    - RecConAug:
        prob: 0.5
        ext_data_num: 2
        image_shape: [3, 48, 320]
        max_text_length: *max_text_length
    - CTCLabelEncode:
    - KeepKeys:
        keep_keys:
        - image
        - label
        - length
  loader:
    shuffle: true
    batch_size_per_card: 64
    drop_last: true
    num_workers: 4
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: /content/dataset/ocr
    label_file_list:
    - /content/dataset/ocr/test_list.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - CTCLabelEncode:
    - SVTRRecResizeImg:
        image_shape: [3, 48, 320]
    - KeepKeys:
        keep_keys:
        - image
        - label
        - length
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 128
    num_workers: 4
"""

# Write to a file
with open("ocr.yaml", "w") as f:
    f.write(ocr_yaml)

In [None]:
!python3 /content/PaddleOCR/tools/train.py -c /content/ocr.yaml -o Global.pretrained_model=/content/en_PP-OCRv3_rec_train/best_accuracy

In [None]:
!python3 /content/PaddleOCR/tools/export_model.py -c /content/ocr.yaml -o Global.pretrained_model=/content/output/v3_en_mobile/best_accuracy Global.save_inference_dir=/content/ocr_model

In [None]:
!pip install paddleocr
import os
from paddleocr import PaddleOCR
from PIL import Image
import cv2
import numpy as np
import time

ocr = PaddleOCR(
    det_model_dir=None,
    cls_model_dir=None,
    rec_model_dir='/content/drive/MyDrive/ocr',
    rec_algorithm='SVTR_LCNet',
    rec_char_dict_path='/content/character_dict.txt',
    use_angle_cls=True,
    lang='en'
)

# Directories
image_dir = '/content/dataset/ocr/test'

true_labels = {}
with open('/content/dataset/ocr/test_list.txt', 'r') as f:
    for line in f:
        image_path, true_label = line.strip().split('\t')
        img_id = os.path.splitext(image_path.split('/')[-1])[0]
        true_labels[img_id] = true_label

total = len(true_labels)
latency = 0

# Open the output file in write mode
with open('/content/ocr_res.txt', 'w') as f:
    # Process each image
    for image_name in os.listdir(image_dir):
        if image_name.endswith(('.jpg', '.png')):
            image_path = os.path.join(image_dir, image_name)

            # Perform OCR
            start_time = time.time()
            result = ocr.ocr(image_path, cls=False, det=False)
            end_time = time.time()
            latency += (end_time - start_time)

            if result:
                # Extract recognized text and confidence score
                recognized_text = ''.join([line[-1][0] for line in result])

                # Write the result to the file
                f.write(f"{image_path}\t{recognized_text}\n")

num_correct = 0
incorrect_samples = []

with open('/content/ocr_res.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        line = line.split('\t')
        img_id = os.path.splitext(line[0].split('/')[-1])[0]
        pred = "".join(ch.upper() for ch in line[1] if ch.isalnum())

        if true_labels[img_id] == pred:
            num_correct += 1
        else:
            incorrect_samples.append((line[0], pred, true_labels[img_id]))

print('The final accuracy is %.2f%%' % ((num_correct / total) * 100))
print(f'The average latency is {(latency / total) * 1000:.2f} ms')

In [None]:
import zipfile

zip_path = "/content/drive/MyDrive/endtoend.zip"
extract_path = "/content/dataset"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [None]:
!pip install paddleocr
import os
from paddleocr import PaddleOCR
from PIL import Image
import cv2
import numpy as np
import time

ocr = PaddleOCR(
    det_model_dir=None,
    cls_model_dir=None,
    rec_model_dir='/content/drive/MyDrive/ocr_model',
    rec_algorithm='SVTR_LCNet',
    rec_char_dict_path='/content/character_dict.txt',
    use_angle_cls=True,
    lang='en'
)

# Directories
image_dir = '/content/dataset/endtoend/test'

true_labels = {}
with open('/content/dataset/endtoend/test_list.txt', 'r') as f:
    for line in f:
        image_path, true_label = line.strip().split('\t')
        img_id = os.path.splitext(image_path.split('/')[-1])[0]
        true_labels[img_id] = true_label

total = len(true_labels)

num_correct = 0
false_positives = 0
false_negatives = 0

for img_name in os.listdir(image_dir):
    if not img_name.lower().endswith(('.jpg', '.png')):
        continue

    img_id = os.path.splitext(img_name)[0]
    img = cv2.imread(os.path.join(image_dir, img_name))
    boxes = model(img)[0].boxes

    found_tp = False
    found_fp = False

    # Try every detection box until the first valid‐length OCR
    for box in boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        crop = img[y1:y2, x1:x2]

        # Shape check: skip if height >= width
        if crop.shape[0] >= crop.shape[1]:
            continue

        # Run OCR
        ocr_res = ocr.ocr(crop, cls=False, det=False) or []
        if not ocr_res:
            continue

        # Build the text string and length check
        text = ''.join([line[0][0] for line in ocr_res])
        if not (2 <= len(text) <= 8):
            continue

        # We have a “valid” OCR—decide TP vs FP
        if text == true_labels[img_id]:
            num_correct    += 1
            found_tp        = True
        else:
            false_positives += 1
            found_fp        = True

        break  # stop after first valid‐length OCR

    # If we never got TP or FP, it’s a false negative
    if not (found_tp or found_fp):
        false_negatives += 1

print(f"Accuracy    : {num_correct/total*100:.2f}%")
print(f"False neg.  : {false_negatives/total*100:.2f}%")
print(f"False pos.  : {false_positives/total*100:.2f}%")