In [41]:
# 📒 Jupyter Notebook phiên bản của train.py (YOLOv8 huấn luyện hộ chiếu)

from ultralytics import YOLO
from pathlib import Path
from datetime import datetime
import matplotlib.pyplot as plt
import cv2
import os

In [42]:
# ✅ Cấu hình
# current_file_path = os.getcwd()
# print(f'Đường dẫn thư mục hiện tại: {current_file_path}')
path_project = Path()
model_path = '../' + 'train/runs/passport_20250423_140913/weights/best.pt'  # đường dẫn đến model đã train xong
data_path = '../' + 'train/data.yaml'  # đường dẫn đến dữ liệu
test_images_dir = '../' + 'tests/images'  # thư mục chứa ảnh test
save_pred_dir = '../' + 'runs/test_predict'  # thư mục lưu ảnh dự đoán
epochs = 50
imgsz = 640
batch = 16
lr0 = 0.001
run_name = f'passport_{datetime.now().strftime("%Y%m%d_%H%M%S")}'
save_path = Path('runs')
run_path = save_path / run_name
save_path.mkdir(exist_ok=True)

In [43]:
# ✅ Load model đã huấn luyện
if os.path.exists(model_path):
    print(f"Loading model from {model_path}")
    model = YOLO(model_path)
else:
  # Nếu không tìm thấy model, thông báo lỗi và thoát
  print(f"Model not found at {model_path}, please check the path.")
  exit(1)


Loading model from ../train/runs/passport_20250423_140913/weights/best.pt


In [44]:
# ✅ Dự đoán trên thư mục ảnh test
results = model.predict(source=test_images_dir, save=True, project='runs', name='test_predict', conf=0.5)


image 1/2 c:\repos\vs-code\baauf-do\repos-py-example\project\wcode-iai\easia-green\notebooks\..\tests\images\visa-1.jpg: 448x640 (no detections), 66.3ms
image 2/2 c:\repos\vs-code\baauf-do\repos-py-example\project\wcode-iai\easia-green\notebooks\..\tests\images\visa-2.jpg: 448x640 (no detections), 62.3ms
Speed: 2.1ms preprocess, 64.3ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns\test_predict2[0m


In [48]:
from paddleocr import PaddleOCR
import json

ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Nếu dùng tiếng Việt thì lang='vi'
output_json_dir = Path("runs/json_output")
output_json_dir.mkdir(exist_ok=True)

for result in results:
    img = cv2.imread(result.path)
    h, w = img.shape[:2]
    info = {}
    img_name = Path(result.path).stem

    for box in result.boxes:
        cls_id = int(box.cls[0])
        label = model.names[cls_id]
        x_center, y_center, bw, bh = [float(v) for v in box.xywh[0]]

        # Tọa độ pixel
        x1 = max(int((x_center - bw / 2)), 0)
        y1 = max(int((y_center - bh / 2)), 0)
        x2 = min(int((x_center + bw / 2)), w)
        y2 = min(int((y_center + bh / 2)), h)

        cropped = img[y1:y2, x1:x2]
        result_ocr = ocr.ocr(cropped, cls=True)

        text = ""
        if result_ocr and len(result_ocr[0]) > 0:
            text = result_ocr[0][0][1][0].strip()
        info[label] = text

    json_path = output_json_dir / f"{img_name}.json"
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(info, f, ensure_ascii=False, indent=2)

    print(f"✅ Trích xong {img_name}.json")
# ✅ Vẽ ảnh dự đoán và lưu vào thư mục


[2025/04/23 14:55:30] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\doman/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\doman/.paddleocr/whl\\rec\\en\\en_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, re

In [40]:
# ✅ In kết quả ra từng ảnh
for result in results:
    print(f"📸 Ảnh: {result.path}")
    for box in result.boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])
        label = model.names[cls_id]
        print(f"🔹 {label} ({conf:.2f})")
    print("-----")

📸 Ảnh: c:\repos\vs-code\baauf-do\repos-py-example\project\wcode-iai\easia-green\notebooks\..\tests\images\visa-1.jpg
-----
📸 Ảnh: c:\repos\vs-code\baauf-do\repos-py-example\project\wcode-iai\easia-green\notebooks\..\tests\images\visa-2.jpg
-----


In [None]:
# ✅ Đánh giá lại mô hình trên tập validation
metrics = model.val(data=data_path)
print(f"📊 Đánh giá mô hình: {metrics}")

Ultralytics 8.3.114  Python-3.10.10 torch-2.0.1+cu118 CPU (12th Gen Intel Core(TM) i5-12400)
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 1663.8877.6 MB/s, size: 572.3 KB)


[34m[1mval: [0mScanning C:\repos\vs-code\baauf-do\repos-py-example\project\wcode-iai\easia-green\train\datasets\labels\val.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


                   all          7         84      0.776      0.592      0.734      0.517
              fullname          7          7        0.8      0.714      0.797      0.574
                   dob          7          7       0.41        0.4       0.36      0.181
                   sex          7          7      0.751      0.714      0.732      0.445
            passportid          7          7          1      0.786      0.886       0.64
           passportid2          7          7          1      0.714      0.933      0.607
           nationality          7          7          1          0      0.559       0.34
                   pob          7          7      0.593      0.429      0.553       0.39
                  cmnd          7          7      0.531      0.571       0.75      0.444
         date_of_issue          7          7      0.569      0.202      0.495      0.355
        date_of_expiry          7          7      0.809      0.571      0.758      0.549
        place_of_issu

TypeError: 'list' object is not callable

In [None]:
# ✅ Hàm vẽ biểu đồ đơn giản
keys = ['precision', 'recall', 'map50', 'map']
print(metrics.keys())
values = [metrics[k] for k in keys]


plt.figure(figsize=(8, 5))
plt.bar(keys, values)
plt.ylabel("Score")
plt.title("📊 Evaluation Metrics")
for i, v in enumerate(values):
    plt.text(i, v + 0.01, f"{v:.2f}", ha='center')
plt.tight_layout()
plt.show()

TypeError: 'list' object is not callable

In [None]:
# ✅ Ghi log (tuỳ chọn)
log_file = run_path / 'train_log.txt'
with open(log_file, 'w') as f:
    f.write(f'Model: {model_path}\n')
    f.write(f'Dataset: {data_path}\n')
    f.write(f'Epochs: {epochs}\n')
    f.write(f'Image size: {imgsz}\n')
    f.write(f'Batch size: {batch}\n')
    f.write(f'Learning rate: {lr0}\n')
    f.write('\n[Evaluation metrics]\n')
    for k in keys:
        f.write(f'{k}: {getattr(metrics, k):.4f}\n')

print(f"✅ Kết quả huấn luyện tại: {run_path}")
print(f"✅ Kết quả dự đoán ảnh test lưu tại: {save_pred_dir}")