In [13]:
# load pretrain model which name is object_detection_model.pth that i saved in model directory
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from collections import OrderedDict

def load_pretrained_model(model_path='models/object_detection_model.pth'):
    # Load the pre-trained Faster R-CNN model
    model = fasterrcnn_resnet50_fpn(pretrained=True)  # Load with pretrained weights
    
    # Load the state dictionary from the custom weights
    checkpoint = torch.load(model_path, map_location=device)
    
    # Remove unexpected keys in the saved state_dict if necessary
    model_dict = model.state_dict()
    checkpoint = checkpoint['model'] if 'model' in checkpoint else checkpoint  # for some checkpoints
    
    # Filter out the unnecessary keys
    checkpoint = {k: v for k, v in checkpoint.items() if k in model_dict}
    
    # Overwrite the existing model state dict
    model_dict.update(checkpoint)
    model.load_state_dict(model_dict)
    
    # Set the model to evaluation mode
    model.eval()
    
    return model


def show_prediction(model, image, target=None, idx2label=None, confidence_threshold=0.5):
    model.eval()
    with torch.no_grad():
        prediction = model([image.to(device)])[0]
        
    boxes = prediction['boxes'].cpu().numpy()
    labels = prediction['labels'].cpu().numpy()
    scores = prediction['scores'].cpu().numpy()
    
    fig, ax = plt.subplots(1, figsize=(12, 9))
    
    # Convert image tensor to numpy for display
    img_display = image.permute(1, 2, 0).cpu().numpy() if image.shape[0] == 3 else image.cpu().numpy()
    
    # Normalize image to [0, 1]
    img_display = img_display / 255.0 if img_display.max() > 1.0 else img_display
    
    ax.imshow(img_display)
    
    for box, label, score in zip(boxes, labels, scores):
        if score < confidence_threshold:
            continue
            
        xmin, ymin, xmax, ymax = box
        width = xmax - xmin
        height = ymax - ymin
        
        rect = patches.Rectangle((xmin, ymin), width, height, linewidth=2,
                                edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        
        label_name = idx2label[label] if idx2label and label in idx2label else f"Class_{label}"
        ax.text(xmin, ymin-5, f"{label_name} ({score:.2f})", 
                color='white', fontsize=10,
                bbox=dict(facecolor='red', alpha=0.7))
    
    # Draw ground truth boxes if available
    if target is not None:
        gt_boxes = target['boxes'].cpu().numpy()
        gt_labels = target['labels'].cpu().numpy()
        
        for box, label in zip(gt_boxes, gt_labels):
            xmin, ymin, xmax, ymax = box
            width = xmax - xmin
            height = ymax - ymin
            
            rect = patches.Rectangle((xmin, ymin), width, height, linewidth=2,
                                    edgecolor='g', facecolor='none', linestyle='--')
            ax.add_patch(rect)
            
            label_name = idx2label[label] if idx2label and label in idx2label else f"Class_{label}"
            ax.text(xmax, ymin-5, f"GT: {label_name}", 
                    color='white', fontsize=10,
                    bbox=dict(facecolor='green', alpha=0.7))
    
    ax.set_title("Red: Predictions, Green: Ground Truth")
    plt.axis('off')
    plt.tight_layout()
    plt.show()

# Load model
model = load_pretrained_model()
model.to(device)

# Load and preprocess image
image_path = "path_to_image.jpg"  # Specify the image path here
image = Image.open(image_path)
transform = transforms.Compose([transforms.ToTensor()])
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Example usage
show_prediction(model, image_tensor[0], confidence_threshold=0.5)



RuntimeError: Error(s) in loading state_dict for FasterRCNN:
	size mismatch for roi_heads.box_predictor.cls_score.weight: copying a param with shape torch.Size([13, 1024]) from checkpoint, the shape in current model is torch.Size([91, 1024]).
	size mismatch for roi_heads.box_predictor.cls_score.bias: copying a param with shape torch.Size([13]) from checkpoint, the shape in current model is torch.Size([91]).
	size mismatch for roi_heads.box_predictor.bbox_pred.weight: copying a param with shape torch.Size([52, 1024]) from checkpoint, the shape in current model is torch.Size([364, 1024]).
	size mismatch for roi_heads.box_predictor.bbox_pred.bias: copying a param with shape torch.Size([52]) from checkpoint, the shape in current model is torch.Size([364]).