In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pytesseract

2025-09-30 23:45:07.125678: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
def load_data(image_dir, xml_dir, img_size=(128, 128)):
    images = []
    bboxes = []  # [xmin, ymin, xmax, ymax] normalized
    img_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.png'))]

    if len(img_files) == 0:
        print("No images found in", image_dir)
        return np.array([]), np.array([])

    for img_file in img_files:
        img_path = os.path.join(image_dir, img_file)
        xml_path = os.path.join(xml_dir, img_file.rsplit('.',1)[0] + '.xml')

        if not os.path.exists(xml_path):
            print(f"Warning: XML not found for {img_file}")
            continue

        img = cv2.imread(img_path)
        if img is None:
            print(f"Warning: Failed to read {img_file}")
            continue
        H, W, _ = img.shape

        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
        except Exception as e:
            print(f"Error parsing XML {xml_path}: {e}")
            continue

        found_bbox = False
        for obj in root.findall('object'):
            name_tag = obj.find('name')
            if name_tag is None:
                continue
            name = name_tag.text

            
            bbox = obj.find('bndbox')
            if bbox is None:
                continue
            xmin = int(bbox.find('xmin').text) / W
            ymin = int(bbox.find('ymin').text) / H
            xmax = int(bbox.find('xmax').text) / W
            ymax = int(bbox.find('ymax').text) / H
            img_resized = cv2.resize(img, img_size)
            images.append(img_resized)
            bboxes.append([xmin, ymin, xmax, ymax])
            found_bbox = True

        if not found_bbox:
            print(f"Warning: No 'plate' object found in {xml_path}")

    images = np.array(images) / 255.0
    bboxes = np.array(bboxes)
    print(f"Loaded {len(images)} images and {len(bboxes)} bounding boxes.")
    return images, bboxes

In [6]:
def build_cnn_model(input_shape=(128,128,3)):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(64, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Conv2D(128, (3,3), activation='relu'))
    model.add(layers.MaxPooling2D(2,2))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(4, activation='sigmoid'))  # xmin, ymin, xmax, ymax normalized
    model.compile(optimizer='adam', loss='mse')
    return model

In [7]:
image_dir = './newdata/imgs'  # Path to car images
xml_dir = './newdata/xml'  # Path to XML annotations
images, bboxes = load_data(image_dir, xml_dir)

X_train, X_test, y_train, y_test = train_test_split(images, bboxes, test_size=0.25, random_state=42)

Loaded 265 images and 265 bounding boxes.


In [None]:
model = build_cnn_model()
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1)
model.save('./model/model1.h5')

Epoch 1/50
Epoch 2/50


In [None]:
def predict_and_crop(image, model, img_size=(128,128)):
    h_orig, w_orig = image.shape[:2]
    img_resized = cv2.resize(image, img_size)/255.0
    bbox_pred = model.predict(np.expand_dims(img_resized, axis=0))[0]
    # Convert back to original scale
    xmin = int(bbox_pred[0]*w_orig)
    ymin = int(bbox_pred[1]*h_orig)
    xmax = int(bbox_pred[2]*w_orig)
    ymax = int(bbox_pred[3]*h_orig)
    cropped = image[ymin:ymax, xmin:xmax]
    return cropped, (xmin, ymin, xmax, ymax)

In [None]:
test_img_path = './test/2.jpg'
img = cv2.imread(test_img_path)
cropped_plate, bbox = predict_and_crop(img, model)

In [None]:
cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
cv2.imshow("Detected Plate", img)
cv2.imshow("Cropped Plate", cropped_plate)
# cv2.waitKey(0)

In [None]:
plate_text = pytesseract.image_to_string(cropped_plate, config='--psm 7')
print("Detected License Plate:", plate_text.strip())

In [None]:
# from sklearn.metrics import precision_recall_curve

# precision, recall, _ = precision_recall_curve(y_true, y_scores)
# plt.plot(recall, precision)
# plt.xlabel('Recall')
# plt.ylabel('Precision')
# plt.title('Precision-Recall Curve')
# plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training vs. Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
def iou(y_true, y_pred):
    """
    Compute Intersection over Union for two bounding boxes.
    y_true and y_pred: [xmin, ymin, xmax, ymax] in pixel coordinates
    """
    xA = max(y_true[0], y_pred[0])
    yA = max(y_true[1], y_pred[1])
    xB = min(y_true[2], y_pred[2])
    yB = min(y_true[3], y_pred[3])

    inter_area = max(0, xB - xA) * max(0, yB - yA)
    boxA_area = (y_true[2] - y_true[0]) * (y_true[3] - y_true[1])
    boxB_area = (y_pred[2] - y_pred[0]) * (y_pred[3] - y_pred[1])

    return inter_area / float(boxA_area + boxB_area - inter_area + 1e-6)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

threshold = 0.5
y_true_labels = []
y_pred_labels = []

for i in range(len(X_test)):
    y_pred = model.predict(np.expand_dims(X_test[i], axis=0))[0]
    h, w, _ = X_test[i].shape

    # Scale back to original image size
    y_pred_scaled = [int(y_pred[0]*w), int(y_pred[1]*h), int(y_pred[2]*w), int(y_pred[3]*h)]
    y_true_scaled = [int(y_test[i][0]*w), int(y_test[i][1]*h), int(y_test[i][2]*w), int(y_test[i][3]*h)]

    iou_score = iou(y_true_scaled, y_pred_scaled)

    if iou_score >= threshold:
        y_true_labels.append(1)  # ground truth exists
        y_pred_labels.append(1)  # correctly predicted
    else:
        y_true_labels.append(1)  # ground truth exists
        y_pred_labels.append(0)  # missed detection

In [None]:
precision = precision_score(y_true_labels, y_pred_labels)
recall = recall_score(y_true_labels, y_pred_labels)
f1 = f1_score(y_true_labels, y_pred_labels)
accuracy = np.mean(np.array(y_true_labels) == np.array(y_pred_labels))

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-score: {f1:.3f}")