In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import math
import numpy as np
import re
from shapely.geometry import Polygon, LineString, Point
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import CocoDetection
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torchvision.transforms as T
from torch.optim import SGD, Adam, Adadelta
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision import transforms
from torch.utils.data._utils.collate import default_collate
import torchvision
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.transforms import functional as F
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
from math import radians, cos, sin
import cv2
import ast

In [2]:
# Load JSON data into a dictionary
with open('./data/ds2_dense/deepscores_train.json') as file:
    data1 = json.load(file)
with open('./data/ds2_dense/deepscores_test.json') as file:
    data2 = json.load(file)

In [3]:
# convert to pandas
# train_labels = pd.DataFrame( data1['categories']).T
train_images = pd.DataFrame( data1['images'])
train_obboxs = pd.DataFrame( data1['annotations']).T
# test_labels = pd.DataFrame( data2['categories']).T
test_images = pd.DataFrame( data2['images'])
test_obboxs = pd.DataFrame( data2['annotations']).T

### Model testing

In [4]:
unique_labels = pd.read_pickle('./data/ds2_dense/unique_labels.pkl')
num_classes = len(unique_labels) + 1  # Define the number of classes including background

In [5]:
def get_model(num_classes):
    # Load a model pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Replace the classifier with a new one, that has
    # num_classes which is user-defined
    num_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(num_features, num_classes)
    
    return model

In [21]:
# Load the trained model's state dictionary
state_dict = torch.load('./202405010002_SGD_200.pt', map_location=torch.device('cpu'))
# Adjust the keys to remove 'module.' prefix added by DataParallel
new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
# Assuming get_model is your function to initialize the model
model = get_model(num_classes)
# Load the adjusted state dictionary into the model
model.load_state_dict(new_state_dict)
# Set the model to evaluation mode
model.eval()



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [22]:
# Load the image
image_path = './data/ds2_dense/images/lg-158395519-aug-lilyjazz--page-3.png'
image = Image.open(image_path).convert("L")
image_tensor = F.to_tensor(image).unsqueeze(0)  # Convert image to tensor

# Convert DataFrame to a dictionary
label_dict = dict(zip(unique_labels['label'], unique_labels['name']))

# Perform prediction
with torch.no_grad():
    predictions = model(image_tensor)

image = Image.open(image_path).convert("RGB")
# Draw predictions on the image
draw = ImageDraw.Draw(image)
# Specify a larger font size for the annotations
font = ImageFont.load_default()  # Currently, load_default does not support size adjustment in PIL

for element in range(len(predictions[0]['boxes'])):
    boxes = predictions[0]['boxes'][element].cpu().numpy().astype(int)
    label_id = predictions[0]['labels'][element].item()
    score = predictions[0]['scores'][element].item()

    # Look up the label name using the label dictionary
    label_name = label_dict.get(label_id, 'Unknown')  # Default to 'Unknown' if not found

    if score > 0.5:  # filter out low-confidence predictions
        draw.rectangle([(boxes[0], boxes[1]), (boxes[2], boxes[3])], outline='red', width=3)
        draw.text((boxes[0], boxes[1]-40), f'{label_name}:{score:.2f}', fill='blue', font=font)  
# Save or show image
image.show()