In [2]:
import torch
import cv2
from pathlib import Path
from PIL import Image, ImageDraw, ImageEnhance, ImageOps, ImageFont
import pytesseract
import os
import numpy as np
from collections import defaultdict
import torchvision.transforms as transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights

# 設定Tesseract安裝路徑
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# 定義模型
def get_model_instance_segmentation(num_classes):
    weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

# 檢查模型文件是否存在
model_path = "C:/Users/ediso/Desktop/ML_project3/best_model.pth"
if not os.path.exists(model_path):
    print(f"Model file not found at {model_path}")
else:
    print(f"Model file found at {model_path}")

# 加載已訓練好的Faster R-CNN模型
model = get_model_instance_segmentation(num_classes=2)
model.load_state_dict(torch.load(model_path))
model.eval()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# 設定影片資料夾和結果保存路徑
video_folder = "C:/Users/ediso/Desktop/AVI"
results_folder = 'C:/Users/ediso/Desktop/ML_project3/OCR-video-results2'
frames_folder = "C:/Users/ediso/Desktop/ML_project3/OCR-frames2"
os.makedirs(results_folder, exist_ok=True)
os.makedirs(frames_folder, exist_ok=True)

# 定義正確的貨櫃號碼，根據文件名
correct_numbers_folder = "C:/Users/ediso/Desktop/pictest"
correct_numbers = {Path(file_name).stem: Path(file_name).stem for file_name in os.listdir(correct_numbers_folder)}

# 調試信息：列出正確的貨櫃號碼
print("Correct numbers dictionary:")
for k, v in correct_numbers.items():
    print(f"{k}: {v}")

# 設定字體大小
font_path = "arial.ttf"  # 確認系統中有此字體，或者選擇其他字體
font_size = 35  # 可以根據需要調整字體大小
font = ImageFont.truetype(font_path, font_size)

def is_valid_container_number(number):
    if len(number) < 11:
        return False
    if not number[10].isdigit():
        return False
    return calculate_check_digit(number[:10]) == int(number[10])

def calculate_check_digit(code):
    values = {'A': 10, 'B': 12, 'C': 13, 'D': 14, 'E': 15, 'F': 16, 'G': 17, 'H': 18, 'I': 19, 'J': 20,
              'K': 21, 'L': 23, 'M': 24, 'N': 25, 'O': 26, 'P': 27, 'Q': 28, 'R': 29, 'S': 30, 'T': 31,
              'U': 32, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38}
    try:
        s = sum(values[code[i]] * (2 ** i) for i in range(4)) + sum(int(code[i + 4]) * (2 ** (i + 4)) for i in range(6))
        return s % 11 % 10
    except (KeyError, ValueError):
        return -1  # 如果有無效字符或轉換失敗，返回-1

def adjust_box(x1, y1, x2, y2, scale=1.2):
    """
    調整框的大小。
    
    :param x1: 左上角x座標
    :param y1: 左上角y座標
    :param x2: 右下角x座標
    :param y2: 右下角y座標
    :param scale: 調整比例（默認為1.2）
    :return: 調整後的座標
    """
    width = x2 - x1
    height = y2 - y1
    new_width = width * scale
    new_height = height * scale
    
    # 計算新座標，使框保持中心
    new_x1 = x1 - (new_width - width) / 2
    new_y1 = y1 - (new_height - height) / 2
    new_x2 = x2 + (new_width - width) / 2
    new_y2 = y2 + (new_height - height) / 2
    
    return new_x1, new_y1, new_x2, new_y2

def preprocess_image_and_save(img, save_path):
    # 增加對比度
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2)

    # 轉換為灰度圖像
    img = ImageOps.grayscale(img)

    # 自適應閾值二值化處理
    img = img.point(lambda x: 0 if x < 128 else 255, '1')

    # 保存二值化處理後的圖片供檢查
    img.save(save_path)

    return img

results_log = os.path.join(results_folder, "results_log.txt")
with open(results_log, "w", encoding="utf-8") as log_file:
    overall_correct_count = 0
    overall_total_count = 0
    
    for video_file in os.listdir(video_folder):
        if video_file.endswith(".mp4") or video_file.endswith(".avi"):
            video_path = os.path.join(video_folder, video_file)
            video_capture = cv2.VideoCapture(video_path)

            frame_width = int(video_capture.get(3))
            frame_height = int(video_capture.get(4))
            output_path = os.path.join(results_folder, f"{Path(video_file).stem}_processed.avi")
            out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), 10, (frame_width, frame_height))

            frame_results = defaultdict(int)
            correct_count = 0
            total_count = 0
            frame_idx = 0

            # 創建子資料夾來保存該影片的圖片
            video_stem = Path(video_file).stem
            video_frames_folder = os.path.join(frames_folder, video_stem)
            os.makedirs(video_frames_folder, exist_ok=True)

            # 從正確的貨櫃號碼文件中提取對應的正確號碼
            correct_number = correct_numbers.get(video_stem, "")

            # 調試信息：確認從文件名中提取的正確號碼
            print(f"Video: {video_file}, Extracted stem: {video_stem}, Correct number: {correct_number}")

            while video_capture.isOpened():
                ret, frame = video_capture.read()
                if not ret:
                    break

                # 將圖片轉換為PIL格式
                image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                
                # 使用Faster R-CNN模型進行物件檢測
                transform = transforms.Compose([transforms.ToTensor()])
                img = transform(image).to(device)
                
                with torch.no_grad():
                    prediction = model([img])
                
                detected_objects = prediction[0]['boxes'].cpu().numpy()
                scores = prediction[0]['scores'].cpu().numpy()

                draw = ImageDraw.Draw(image)
                for obj, score in zip(detected_objects, scores):
                    if score > 0.5:  # 設置置信度閾值
                        x1, y1, x2, y2 = adjust_box(*obj)
                        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
                        cropped_img = image.crop((x1, y1, x2, y2))
                        # 保存裁剪後的圖片供調試
                        cropped_output_path = os.path.join(video_frames_folder, f"{video_stem}_frame_{frame_idx}_cropped.jpg")
                        cropped_img.save(cropped_output_path)
                        
                        # 設置 Tesseract 參數
                        custom_config = r'--oem 3 --psm 6'
                        processed_img = preprocess_image_and_save(cropped_img, os.path.join(video_frames_folder, f"{video_stem}_frame_{frame_idx}_processed.jpg"))
                        recognized_text = pytesseract.image_to_string(processed_img, config=custom_config)
                        recognized_text = ''.join(filter(str.isalnum, recognized_text))
                        
                        if is_valid_container_number(recognized_text):
                            frame_results[recognized_text[:11]] += 1
                            draw.text((x1, y1 - 10), recognized_text[:11], fill="red", font=font)
                            
                            # 計算總數和正確數
                            total_count += 1
                            if recognized_text[:11] == correct_number[:11]:
                                correct_count += 1

                # 保存每幀圖片
                frame_output_path = os.path.join(video_frames_folder, f"{video_stem}_frame_{frame_idx}.jpg")
                image.save(frame_output_path)
                frame_idx += 1
                
                processed_frame = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
                out.write(processed_frame)

            video_capture.release()
            out.release()

            # 進行多數決，決定最終的貨櫃號碼
            if frame_results:
                final_number = max(frame_results, key=frame_results.get)
                log_file.write(f"Video: {video_file}, Final Container Number: {final_number}\n")
                
                if final_number == correct_number:
                    log_file.write(f"Video: {video_file}, Recognized correctly: {final_number}\n")
                    overall_correct_count += 1
                else:
                    log_file.write(f"Video: {video_file}, Recognized incorrectly: {final_number} (correct: {correct_number})\n")
            else:
                log_file.write(f"Video: {video_file}, No valid container number detected\n")
            
            overall_total_count += 1

            # 計算影片中的OCR準確率
            accuracy = correct_count / total_count if total_count > 0 else 0
            log_file.write(f"Video: {video_file}, OCR Accuracy: {accuracy:.2f}\n")

    # 計算並記錄整體準確率
    overall_accuracy = overall_correct_count / overall_total_count if overall_total_count > 0 else 0
    log_file.write(f"Overall OCR Accuracy: {overall_accuracy:.2f}\n")

print("所有結果已記錄在 results_log.txt 中。")
print(f"Overall OCR Accuracy: {overall_accuracy:.2f}")


  video_folder = "C:/Users/ediso\Desktop/AVI"


Model file found at C:/Users/ediso/Desktop/ML_project3/best_model.pth
Correct numbers dictionary:
FFAU2895947: FFAU2895947
MAGU5605323 : MAGU5605323 
SEKU5875349: SEKU5875349
SEKU5877491: SEKU5877491
SEKU6026686: SEKU6026686
TCNU6246126: TCNU6246126
TLLU4080736: TLLU4080736
TRHU8927462: TRHU8927462
TSSU5017340: TSSU5017340
TSSU5029819: TSSU5029819
TSSU5042071: TSSU5042071
TSSU5061615: TSSU5061615
TSSU5099400: TSSU5099400
TSSU5142300: TSSU5142300
TSSU5160351: TSSU5160351
WHLU5591798: WHLU5591798
WHLU5842825: WHLU5842825
WHSU2483178: WHSU2483178
WHSU2615314: WHSU2615314
WHSU2864765: WHSU2864765
WHSU5223791: WHSU5223791
WHSU5295430: WHSU5295430
WHSU5368199: WHSU5368199
WHSU5563298: WHSU5563298
WHSU5610492: WHSU5610492
WHSU5628589: WHSU5628589
WHSU5744465: WHSU5744465
WHSU5927851: WHSU5927851
WHSU5991104: WHSU5991104
WHSU5998393: WHSU5998393
WHSU6010260: WHSU6010260
WHSU6040178: WHSU6040178
WHSU6052306: WHSU6052306
WHSU6167120: WHSU6167120
WHSU6557387: WHSU6557387
WHSU6651665: WHSU6651665
