# Phát hiện người sử dụng Fine-tuned Faster R-CNN

# Import thư viện

In [None]:
# Import thư viện
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torchvision.datasets import CocoDetection
from torchvision import transforms
import torchvision
import matplotlib.pyplot as plt
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
import json
from PIL import Image, ImageDraw, ImageFont
import optuna
from pycocotools.coco import COCO
from tqdm import tqdm
from torch.utils.data import Dataset
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import matplotlib.pyplot as plt
import albumentations as A
import cv2
from pathlib import Path
import shutil
import requests
from io import BytesIO
from torch.optim.lr_scheduler import ReduceLROnPlateau
import tempfile

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Định nghĩa class Dataset và khởi tạo các loader

In [None]:
class CocoDataset(CocoDetection):
    """
    Dataset tổng quát cho dữ liệu COCO, hỗ trợ cả chế độ không augmentation và có augmentation.
    
    Args:
        img_folder (str): Thư mục chứa hình ảnh.
        ann_file (str): Đường dẫn đến file annotation COCO (JSON).
        transforms (callable, optional): Biến đổi cho hình ảnh (thường dùng với torchvision).
        augmentation (callable, optional): Pipeline tăng cường dữ liệu (thường dùng với Albumentations).
    """
    def __init__(self, img_folder, ann_file, augmentation=None):
        super().__init__(img_folder, ann_file)
        self._transforms = transforms
        self._augmentation = augmentation
        self._to_tensor = T.ToTensor()

    def __getitem__(self, idx):
        img, targets = super().__getitem__(idx)
        
        # Chuyển đổi annotations COCO sang định dạng Faster R-CNN
        boxes = []
        labels = []
        for t in targets:
            xmin = t['bbox'][0]
            ymin = t['bbox'][1]
            xmax = xmin + t['bbox'][2]
            ymax = ymin + t['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(t['category_id'])
        
        if self._augmentation:
            # Chế độ augmentation (Albumentations)
            img = np.array(img)  # Chuyển PIL Image sang numpy array
            augmented = self._augmentation(image=img, bboxes=boxes, class_labels=labels)
            img = augmented['image']
            boxes = augmented['bboxes']
            labels = augmented['class_labels']
            # Chuyển lại thành tensor cho Faster R-CNN
            img = torch.tensor(img.transpose(2, 0, 1), dtype=torch.float32) / 255.0
        else:
            # Chế độ không augmentation
            img = self._to_tensor(img)
                
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx])
        }
        
        return img, target

In [None]:
aug_train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),  # Horizontal flip with 50% probability
    A.Rotate(limit=15, p=0.2),  # Rotate ±30 degrees with 30% probability
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),  # Adjust brightness and contrast
    A.GaussNoise(p=0.1),  # Add Gaussian noise
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.3),  # Shift, scale, rotate
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))

train_dataset = CocoDataset(
    img_folder='/kaggle/input/inriaperson/Train/JPEGImages',
    ann_file='/kaggle/input/inria-coco-format/coco_train.json'
)

test_dataset = CocoDataset(
    img_folder='/kaggle/input/inriaperson/Test/JPEGImages',
    ann_file='/kaggle/input/inria-coco-format/coco_test.json'
)

aug_train_dataset = CocoDataset(
    img_folder='/kaggle/input/inriaperson/Train/JPEGImages',  
    ann_file='/kaggle/input/inria-coco-format/coco_train.json', 
    augmentation = aug_train_transform
)

aug_train_loader = DataLoader(aug_train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


# Định nghĩa các hàm phụ (hàm đánh giá, hàm load checkpoint và hàm vẽ đồ thị loss)

In [None]:
def evaluate_map(model, dataloader, ann_file, device="cuda" if torch.cuda.is_available() else "cpu"):
    """
    Tính mAP cho mô hình Faster R-CNN trên tập dữ liệu COCO.
    
    Args:
        model: Mô hình Faster R-CNN đã được huấn luyện.
        dataloader: DataLoader cho tập kiểm tra/valid.
        ann_file: Đường dẫn đến file annotation COCO (JSON).
        device: Thiết bị tính toán (cuda hoặc cpu).
    
    Returns:
        float: Giá trị mAP (COCO-style, trung bình IoU từ 0.5 đến 0.95).
    """
    model.eval()
    model.to(device)
    
    # Tải COCO ground truth
    coco_gt = COCO(ann_file)
    
    # Danh sách lưu trữ dự đoán
    predictions = []
    image_ids = []
    
    with torch.no_grad():
        for images, targets in dataloader:
            images = [img.to(device) for img in images]
            # Dự đoán từ mô hình
            outputs = model(images)
            
            for i, output in enumerate(outputs):
                image_id = targets[i]["image_id"].item() if "image_id" in targets[i] else len(image_ids)
                image_ids.append(image_id)
                
                # Lấy boxes, scores, labels từ output
                boxes = output["boxes"].cpu().numpy()  # [x_min, y_min, x_max, y_max]
                scores = output["scores"].cpu().numpy()
                labels = output["labels"].cpu().numpy()
                
                # Chuyển đổi boxes sang định dạng COCO [x, y, width, height]
                boxes_coco = np.zeros_like(boxes)
                boxes_coco[:, 0] = boxes[:, 0]  # x_min
                boxes_coco[:, 1] = boxes[:, 1]  # y_min
                boxes_coco[:, 2] = boxes[:, 2] - boxes[:, 0]  # width
                boxes_coco[:, 3] = boxes[:, 3] - boxes[:, 1]  # height
                
                # Lưu dự đoán vào danh sách
                for box, score, label in zip(boxes_coco, scores, labels):
                    predictions.append({
                        "image_id": int(image_id),
                        "category_id": int(label),  # Class ID
                        "bbox": box.tolist(),       # [x, y, width, height]
                        "score": float(score)       # Confidence score
                    })
    
    # Tạo COCO object cho dự đoán
    coco_dt = coco_gt.loadRes(predictions)
    
    # Khởi tạo COCOeval
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    
    # Tính mAP
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
    
    # Lấy mAP@IoU=0.5:0.95 (COCO default)
    map_score = coco_eval.stats[0]  # mAP@IoU=0.5:0.95
    
    return map_score            
    


def load_checkpoint(model, checkpoint_path, device, checkpoint_input_dir='/kaggle/input/inria-coco-format'):
    """
    Load checkpoint state cho model và optimizer.
    
    Args:
        model: Mô hình Faster RCNN.
        checkpoint_path: Tên file checkpoint.
        device: Thiết bị tính toán.
        checkpoint_input_dir: đường dẫn đến folder chứa file checkpoint.
        
    Returns:
        Số epoch, train_losses, test_losses model tại checkpoint.
        
    """
    checkpoint_full_path = os.path.join(checkpoint_input_dir, checkpoint_path)
    if os.path.exists(checkpoint_full_path):
        checkpoint = torch.load(checkpoint_full_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        epoch = checkpoint['epoch']
        train_losses = checkpoint['train_losses']
        test_losses = checkpoint['test_losses']
        print(f"Loaded checkpoint from {checkpoint_full_path}, resuming from epoch {epoch + 1}")
        return epoch, train_losses, test_losses
    else:
        print(f"No checkpoint found at {checkpoint_full_path}")
        return 0, [], []

def plot_losses(train_losses, test_losses, output_dir='/kaggle/working'):
    """
    Vẽ đồ thị biểu hiện giá trị của train_loss và test_loss qua các epoch

    Args:
        train_losses: mảng chứa giá trị của các loss trong quá trình training.
        test_losses: mảng chứa giá trị của các loss trong quá trình testing.

    Returns:
        None
    """
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss', marker='o')
    if any(l != float('nan') for l in test_losses):
        plt.plot(range(1, len(test_losses) + 1), test_losses, label='Test Loss', marker='s')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Train and Test Loss over Epochs')
    plt.legend()
    plt.grid(True)
    plot_path = os.path.join(output_dir, 'loss_plot.png')
    plt.savefig(plot_path)
    plt.close()
    print(f"Saved loss plot at {plot_path}")

# Định nghĩa hàm train

In [None]:
def train_model(model, train_dataloader, test_dataloader, optimizer, num_epochs, device, 
                checkpoint_dir='/kaggle/working', checkpoint_input_dir='/kaggle/input/inria-coco-format', 
                epochs_per_run=2, resume_from_checkpoint=None, patience=5, min_delta=0.001):
    model.to(device)
    
    # Khởi tạo lịch sử loss
    train_losses = []
    test_losses = []
    start_epoch = 0
    best_test_loss = float('inf')
    epochs_no_improve = 0
    
    # Khởi tạo scheduler ReduceLROnPlateau
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
    
    # Tải checkpoint nếu có
    if resume_from_checkpoint and os.path.exists(os.path.join(checkpoint_input_dir, resume_from_checkpoint)):
        checkpoint_path = os.path.join(checkpoint_input_dir, resume_from_checkpoint)
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        train_losses = checkpoint['train_losses']
        test_losses = checkpoint['test_losses']
        best_test_loss = checkpoint.get('best_test_loss', float('inf'))
        epochs_no_improve = checkpoint.get('epochs_no_improve', 0)
        print(f"Resumed from checkpoint at {checkpoint_path}, starting from epoch {start_epoch}")
    elif resume_from_checkpoint:
        print(f"No checkpoint found at {os.path.join(checkpoint_input_dir, resume_from_checkpoint)}")
    
    # Tính tổng số ảnh trong dataset
    train_dataset_size = len(train_dataloader.dataset)
    test_dataset_size = len(test_dataloader.dataset)
    
    # Vòng lặp qua các epoch
    for epoch in range(start_epoch, num_epochs):
        # Training phase
        model.train()
        total_train_loss = 0
        images_processed = 0
        train_progress = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", 
                             leave=False, total=len(train_dataloader))
        for images, targets in train_progress:
            batch_size = len(images)
            images_processed += batch_size
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()
            
            total_train_loss += losses.item()
            train_progress.set_postfix({
                'batch_loss': f"{losses.item():.4f}",
                'images': f"{images_processed}/{train_dataset_size}"
            })
        
        avg_train_loss = total_train_loss / len(train_dataloader)
        train_losses.append(avg_train_loss)
        
        # Testing phase
        model.eval()
        total_test_loss = 0
        images_processed = 0
        test_progress = tqdm(test_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Test]", 
                            leave=False, total=len(test_dataloader))
        with torch.no_grad():
            for images, targets in test_progress:
                batch_size = len(images)
                images_processed += batch_size
                images = [img.to(device) for img in images]
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                
                # Tạm thời chuyển sang train mode để tính loss
                model.train()
                loss_dict = model(images, targets)
                model.eval()
                
                if isinstance(loss_dict, dict):
                    losses = sum(loss for loss in loss_dict.values())
                    total_test_loss += losses.item()
                else:
                    print(f"Warning: Loss not computed for batch in test phase, got {type(loss_dict)}")
                    total_test_loss += 0
                
                test_progress.set_postfix({
                    'batch_loss': f"{losses.item():.4f}" if isinstance(loss_dict, dict) else "N/A",
                    'images': f"{images_processed}/{test_dataset_size}"
                })
        
        avg_test_loss = total_test_loss / len(test_dataloader) if total_test_loss > 0 else float('nan')
        test_losses.append(avg_test_loss)
        
        # Cập nhật scheduler
        if avg_test_loss != float('nan'):
            scheduler.step(avg_test_loss)
        
        # In kết quả
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Test Loss: {avg_test_loss:.4f}" if avg_test_loss != float('nan') else 'N/A')
        
        # Lưu checkpoint
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_losses': train_losses,
            'test_losses': test_losses,
            'best_test_loss': best_test_loss,
            'epochs_no_improve': epochs_no_improve
        }
        # Lưu checkpoint sau mỗi epochs_per_run hoặc ở epoch cuối
        if (epoch + 1) % epochs_per_run == 0 or epoch + 1 == num_epochs:
            checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch+1}.pth')
            torch.save(checkpoint, checkpoint_path)
            print(f"Saved checkpoint at {checkpoint_path}")
            
            # Lưu lịch sử loss vào JSON
            loss_history = {
                'train_losses': train_losses,
                'test_losses': test_losses
            }
            loss_history_path = os.path.join(checkpoint_dir, 'loss_history.json')
            with open(loss_history_path, 'w') as f:
                json.dump(loss_history, f, indent=4)
            print(f"Saved loss history at {loss_history_path}")
        
        # Early Stopping
        if avg_test_loss != float('nan') and avg_test_loss < best_test_loss - min_delta:
            best_test_loss = avg_test_loss
            epochs_no_improve = 0
            torch.save(checkpoint, os.path.join(checkpoint_dir, 'best_model.pth'))
            print(f"Saved best model with Test Loss: {best_test_loss:.4f}")
        elif avg_test_loss != float('nan'):
            epochs_no_improve += 1
        
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epochs_no_improve} epochs without improvement")
            break
    
    # Vẽ biểu đồ
    plot_losses(train_losses, test_losses, checkpoint_dir)
    
    return train_losses, test_losses

# Train model và đánh giá model với *trainable_backbone_layers = 3*


In [None]:
ft_model = fasterrcnn_resnet50_fpn(pretrained=True, trainable_backbone_layers=3)
ft_in_features = ft_model.roi_heads.box_predictor.cls_score.in_features
ft_model.roi_heads.box_predictor = FastRCNNPredictor(ft_in_features, 2)
ft_model.to(device)

optimizer = torch.optim.SGD(
    ft_model.parameters(),
    lr=0.001,
    weight_decay= 0.0005,
    momentum=0.9
)
    
num_epochs = 100
epochs_per_run = 5
train_losses, test_losses = train_model(
ft_model, train_loader, test_loader, optimizer, num_epochs, device,
checkpoint_dir='/kaggle/working',
checkpoint_input_dir='/kaggle/input/inria-coco-format',
epochs_per_run=epochs_per_run, resume_from_checkpoint=None)

print(evaluate_map(ft_model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

# Train model và đánh giá model với *trainable_backbone_layers = 0* và dùng augmented data

In [None]:
# Train model và đánh giá model với trainable_backbone_layers = 3 và dùng augmented data
aug_model = fasterrcnn_resnet50_fpn(pretrained=True, trainable_backbone_layers=0)
aug_in_features = aug_model.roi_heads.box_predictor.cls_score.in_features
aug_model.roi_heads.box_predictor = FastRCNNPredictor(aug_in_features, 2)
aug_model.to(device)

aug_optimizer = torch.optim.SGD(
    aug_model.parameters(),
    lr=0.001,
    weight_decay= 0.0005,
    momentum=0.9
)

num_epochs = 100
epochs_per_run = 5
train_losses, test_losses = train_model(
aug_model, aug_train_loader, test_loader, aug_optimizer, num_epochs, device,
checkpoint_dir='/kaggle/working',
checkpoint_input_dir='/kaggle/input/inria-coco-format',
epochs_per_run=epochs_per_run, resume_from_checkpoint=None)

print(evaluate_map(aug_model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

# Đánh giá các model

Chúng em đã thực hiện train các model và lưu checkpoint vào những file cụ thể như sau:
- Model được train dùng dataset tăng cường và *trainable_backbone_layers = 0*: aug_best_model_0.pth
- Model được train không dùng dataset tăng cường và *trainable_backbone_layers = 0*: best_model_0.pth
- Model được train không dùng dataset tăng cường và *trainable_backbone_layers = 3*: best_model_3.pth
- Model được train không dùng dataset tăng cường và *trainable_backbone_layers = 5*: best_model_5.pth

In [None]:
# Đánh giá pretrained FasterRCNN
pretrained_model = fasterrcnn_resnet50_fpn(pretrained = True)
pretrained_model.to(device)
print(evaluate_map(pretrained_model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

In [None]:
# Khai báo mô hình với lớp đầu ra là 2 (người hay nền)
model = fasterrcnn_resnet50_fpn(pretrained = True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
model.to(device)

In [None]:
# Đánh giá model dùng dataset tăng cường và trainable_backbone_layers = 0
load_checkpoint(model, 'aug_best_model_0.pth', device, checkpoint_input_dir='/kaggle/input/inria-coco-format')
print(evaluate_map(model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

In [None]:
# Đánh giá model không dùng dataset tăng cường và trainable_backbone_layers = 0
load_checkpoint(model, 'best_model_0.pth', device, checkpoint_input_dir='/kaggle/input/inria-coco-format')
print(evaluate_map(model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

In [None]:
# Đánh giá model không dùng dataset tăng cường và trainable_backbone_layers = 3
load_checkpoint(model, 'best_model_3.pth', device, checkpoint_input_dir='/kaggle/input/inria-coco-format')
print(evaluate_map(model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

In [None]:
# Đánh giá model không dùng dataset tăng cường và trainable_backbone_layers = 5
load_checkpoint(model, 'best_model_5.pth', device, checkpoint_input_dir='/kaggle/input/inria-coco-format')
print(evaluate_map(model, test_loader, "/kaggle/input/inria-coco-format/coco_test.json"))

Như đã trình bày ở phần thuyết trình, theo quan sát, tụi em thấy model không dùng dataset tăng cường và có tham số trainable_backbone_layers = 3 là có kết quả tốt nhất theo tiêu chí đánh giá đặt ra trong bài, nên tụi em sẽ sử dụng nó để thực hiện các demo.

In [None]:
load_checkpoint(model, 'best_model_3.pth', device, checkpoint_input_dir='/kaggle/input/inria-coco-format')

# Thực nghiệm

Ở phần này, tụi em thực hiện vẽ ground truth bbox, đồng thời vẽ các predicted bbox trên tất cả các ảnh (ảnh train và test trong dataset INRIA) và giải nén thành 2 file zip: train_output.zip(chứa các ảnh train đã được vẽ các bbox) và test_output.zip(chứa các ảnh test đã được vẽ các bbox).


In [None]:
def load_coco_data(json_path):
    """Load COCO format annotations from a JSON file."""
    with open(json_path, 'r') as f:
        coco_data = json.load(f)
    return coco_data

def get_image_paths_and_annotations(coco_data, image_dir):
    """Extract image paths and corresponding annotations."""
    image_info = {img['id']: img for img in coco_data['images']}
    annotations = coco_data['annotations']
    image_paths = []
    image_annotations = {}
    
    for img_id, img in image_info.items():
        image_path = os.path.join(image_dir, img['file_name'])
        image_paths.append(image_path)
        image_annotations[img['file_name']] = [ann for ann in annotations if ann['image_id'] == img_id]
    
    return image_paths, image_annotations

def draw_bboxes(image, gt_bboxes, pred_bboxes, output_path):
    """Draw ground truth (green) and predicted (red) bounding boxes with confidence scores."""
    draw = ImageDraw.Draw(image)
    
    # Try to load a font, fall back to default if not available
    try:
        font = ImageFont.truetype("arial.ttf", 15)
    except:
        font = ImageFont.load_default()
    
    # Draw ground truth boxes (green)
    for bbox in gt_bboxes:
        x, y, w, h = bbox['bbox']
        draw.rectangle((x, y, x + w, y + h), outline='green', width=2)
    
    # Draw predicted boxes (red) with confidence scores
    for bbox, score in zip(pred_bboxes['boxes'], pred_bboxes['scores']):
        x1, y1, x2, y2 = bbox.tolist()
        draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
        draw.text((x1, y1 - 15), f'{score:.2f}', fill='red', font=font)
    
    # Save the output image
    image.save(output_path)

def process_dataset(model, image_paths, annotations, image_dir, output_dir):
    """Process images in a dataset, draw bboxes, and save results."""
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    os.makedirs(output_dir, exist_ok=True)
    
    for img_path in image_paths:
        img_name = os.path.basename(img_path)
        img = Image.open(img_path).convert('RGB')
        img_tensor = F.to_tensor(img).unsqueeze(0).to(device)
        
        # Get predictions
        with torch.no_grad():
            predictions = model(img_tensor)[0]
        
        # Get ground truth bboxes
        gt_bboxes = annotations.get(img_name, [])
        
        # Draw bboxes and save
        output_path = os.path.join(output_dir, img_name)
        draw_bboxes(img, gt_bboxes, predictions, output_path)

In [None]:
train_img_dir = '/kaggle/input/inriaperson/Train/JPEGImages'
test_img_dir = '/kaggle/input/inriaperson/Test/JPEGImages'
train_json_path = '/kaggle/input/inria-coco-format/coco_train.json'
test_json_path = '/kaggle/input/inria-coco-format/coco_test.json'

# Load COCO annotations
train_coco = load_coco_data(train_json_path)
test_coco = load_coco_data(test_json_path)

# Get image paths and annotations
train_image_paths, train_annotations = get_image_paths_and_annotations(train_coco, train_img_dir)
test_image_paths, test_annotations = get_image_paths_and_annotations(test_coco, test_img_dir)

In [None]:
train_output_dir = '/kaggle/working/train_output'
test_output_dir = '/kaggle/working/test_output'

normal_model.eval()

# Process train and test datasets
process_dataset(normal_model, train_image_paths, train_annotations, train_img_dir, train_output_dir)
process_dataset(normal_model, test_image_paths, test_annotations, test_img_dir, test_output_dir)

# Zip the output directories
shutil.make_archive('/kaggle/working/train_output', 'zip', train_output_dir)
shutil.make_archive('/kaggle/working/test_output', 'zip', test_output_dir)

## Hàm vẽ predicted bbox trên một ảnh bất kỳ

In [None]:
# Theo đường dẫn file ảnh
def draw_predicted_bbox_file(image_path, model, output_path):
    """
    Draw predicted bounding boxes (red) with confidence scores on a single image,
    display the image, and save it to output_path.
    
    Args:
        image_path (str): Path to the input image.
        model: Trained Faster R-CNN model.
        output_path (str): Path to save the output image.
    """
    # Set model to evaluation mode
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Load and preprocess image
    image = Image.open(image_path).convert('RGB')
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
    
    # Get predictions
    with torch.no_grad():
        predictions = model(image_tensor)[0]
    
    # Draw predicted boxes
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 15)
    except:
        font = ImageFont.load_default()
    
    for bbox, score in zip(predictions['boxes'], predictions['scores']):
        x1, y1, x2, y2 = bbox.tolist()
        draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
        draw.text((x1, y1 - 15), f'{score:.2f}', fill='red', font=font)
    
    # Save output image
    image.save(output_path)
    
    # Display image
    plt.figure(figsize=(10, 10))
    plt.imshow(np.array(image))
    plt.axis('off')
    plt.show()
    
# Theo link URL của ảnh
def draw_predicted_bbox(url, model, output_path):
    """
    Draw predicted bounding boxes (red) with confidence scores on an image from a URL,
    display the image, and save it to output_path.
    
    Args:
        url (str): URL of the input image.
        model: Trained Faster R-CNN model.
        output_path (str): Path to save the output image.
    """
    # Set model to evaluation mode
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Download and load image from URL
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for request errors
        image = Image.open(BytesIO(response.content)).convert('RGB')
    except Exception as e:
        raise Exception(f"Failed to load image from URL: {e}")
    
    # Preprocess image
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
    
    # Get predictions
    with torch.no_grad():
        predictions = model(image_tensor)[0]
    
    # Draw predicted boxes
    draw = ImageDraw.Draw(image)
    try:
        font = ImageFont.truetype("arial.ttf", 15)
    except:
        font = ImageFont.load_default()
    
    for bbox, score in zip(predictions['boxes'], predictions['scores']):
        x1, y1, x2, y2 = bbox.tolist()
        draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
        draw.text((x1, y1 - 15), f'{score:.2f}', fill='red', font=font)
    
    # Save output image
    image.save(output_path)
    
    # Display image
    plt.figure(figsize=(10, 10))
    plt.imshow(np.array(image))
    plt.axis('off')
    plt.show()

### Ảnh dùng fine-tuned model

In [None]:
draw_predicted_bbox_file('/kaggle/input/inria-coco-format/demo.png', model, '/kaggle/working/normal_demo.png')

### Ảnh dùng pretrained_model

In [None]:
pretrained_model.eval()
draw_predicted_bbox_file('/kaggle/input/inria-coco-format/demo.png', pretrained_model, '/kaggle/working/org_demo.png')

In [None]:
draw_predicted_bbox('https://scontent.fsgn3-1.fna.fbcdn.net/v/t39.30808-6/504065779_9486874648084132_138695223547462727_n.jpg?_nc_cat=104&ccb=1-7&_nc_sid=833d8c&_nc_eui2=AeHyG9Gz9ww46K8mhR-o_koQbvb3RlXT0N5u9vdGVdPQ3r4O3Dgh4doMDwgyDXjUZt_Y9AHfYP_RwpGOTe8c7G2N&_nc_ohc=4B6vpFlITmYQ7kNvwHTToBK&_nc_oc=Adl7tE4_xnK_jj4OtUs9Khj1CQGM6Dj5A2aUhqNVqwx65Y_dNj8NpIa9P_UPBRSOu_jFK8YOTTlCVU5hVc58aKwV&_nc_zt=23&_nc_ht=scontent.fsgn3-1.fna&_nc_gid=pUdAQw23gSUUPanBXt3yyg&oh=00_AfNjRZbp7RLrmje6o633WYZwtEndtfpcc4KvHtGXRtfbdw&oe=684C281A', model, '/kaggle/working/image.png')

### Hàm vẽ các predicted_bbox với đầu vào là đường dẫn của một video

In [None]:
def draw_predicted_bbox_video(video_path, model, output_path, display_frames=5):
    """
    Draw predicted bounding boxes (red) with confidence scores on each frame of a local video,
    display a few sample frames, and save the output video to output_path.
    
    Args:
        video_path (str): Path to the input video file on local storage (e.g., Kaggle).
        model: Trained Faster R-CNN model.
        output_path (str): Path to save the output video.
        display_frames (int): Number of sample frames to display (default: 5).
    
    Returns:
        bool: True if processing is successful, False otherwise.
    """
    # Set model to evaluation mode
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Check if video file exists
    if not os.path.exists(video_path):
        print(f"Video file not found: {video_path}")
        return False
    
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}. Check if the file is a valid video or if OpenCV supports the codec.")
        return False
    
    # Get video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print(f"Video properties: FPS={fps}, Width={width}, Height={height}, Frames={frame_count}")
    
    # Initialize video writer
    try:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        if not out.isOpened():
            raise Exception("Failed to initialize video writer. Ensure 'mp4v' codec is supported.")
    except Exception as e:
        print(f"Error initializing video writer: {e}")
        cap.release()
        return False
    
    # Font for confidence scores
    try:
        font = ImageFont.truetype("arial.ttf", 15)
    except:
        font = ImageFont.load_default()
    
    # Process frames
    sample_frames = []
    frame_idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert frame to PIL Image
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(frame_rgb)
        image_tensor = F.to_tensor(image).unsqueeze(0).to(device)
        
        # Get predictions
        with torch.no_grad():
            predictions = model(image_tensor)[0]
        
        # Draw predicted boxes
        draw = ImageDraw.Draw(image)
        for bbox, score in zip(predictions['boxes'], predictions['scores']):
            x1, y1, x2, y2 = bbox.tolist()
            draw.rectangle((x1, y1, x2, y2), outline='red', width=2)
            draw.text((x1, y1 - 15), f'{score:.2f}', fill='red', font=font)
        
        # Convert back to OpenCV format
        frame_out = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
        out.write(frame_out)
        
        # Store sample frames for display
        if frame_idx < display_frames:
            sample_frames.append(np.array(image))
        
        frame_idx += 1
    
    # Release resources
    cap.release()
    out.release()
    
    # Display sample frames
    if sample_frames:
        plt.figure(figsize=(15, 5))
        for i, frame in enumerate(sample_frames):
            plt.subplot(1, min(display_frames, len(sample_frames)), i + 1)
            plt.imshow(frame)
            plt.axis('off')
            plt.title(f'Frame {i + 1}')
        plt.show()
    else:
        print("No frames were processed.")
    
    print(f"Output video saved to: {output_path}")
    return True

In [None]:
success = draw_predicted_bbox_video('/kaggle/input/inria-coco-format/demo_video.mp4', normal_model, '/kaggle/working/demo_video.mp4')
if success:
    print("Video processing completed successfully!")
else:
    print("Video processing failed.")