## Notes

DETR-run5 - Accuracy = 94.8%

DETR-run7 - Accuracy = 84.5%

DETR-run13 - Accuracy = 82.7%

In [8]:
import os 
import supervision as sv
from transformers import DetrForObjectDetection, DetrImageProcessor
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from PIL import Image, ImageDraw, ImageFont



In [10]:
from core.inference import loadModel
from core.img2pdf import readPDF, savePDF
from core.createGroundTruth import create_ground_truth_dict
import time
import torchvision
from torchvision.ops import box_iou
import cv2
import random
import numpy as np
import pandas as pd

In [12]:
## CocoDetection Class 
image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# settings
ANNOTATION_FILE_NAME = r"result.json"
TRAIN_DIRECTORY = os.path.join(r"dataset2", r"train")
VAL_DIRECTORY = os.path.join(r"dataset2", r"val")
TEST_DIRECTORY = os.path.join(r"dataset2", r"test")

class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(
        self,
        image_directory_path: str,
        image_processor,
        train: bool = True
    ):
        annotation_file_path = os.path.join(image_directory_path, ANNOTATION_FILE_NAME)
        super(CocoDetection, self).__init__(image_directory_path, annotation_file_path)
        self.image_processor = image_processor

    def __getitem__(self, idx):
        images, annotations = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        annotations = {'image_id': image_id, 'annotations': annotations}
        encoding = self.image_processor(images=images, annotations=annotations, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]

        return pixel_values, target
    
TRAIN_DATASET = CocoDetection(
    image_directory_path=TRAIN_DIRECTORY,
    image_processor=image_processor,
    train=True)
VAL_DATASET = CocoDetection(
    image_directory_path=VAL_DIRECTORY,
    image_processor=image_processor,
    train=False)
TEST_DATASET = CocoDetection(
    image_directory_path=TEST_DIRECTORY,
    image_processor=image_processor,
    train=False)

print("Number of training examples:", len(TRAIN_DATASET))
print("Number of validation examples:", len(VAL_DATASET))
print("Number of test examples:", len(TEST_DATASET))

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Number of training examples: 130
Number of validation examples: 32
Number of test examples: 19


In [13]:
CHECKPOINT = "facebook/detr-resnet-50"

# Best Performing Model
MODEL_PATH = "models/DETR-run7"

# Doesnt Work
# MODEL_101 = 'facebook/detr-resnet-101'
# CHECKPOINT_101 = 'facebook/detr-resnet-101'

# Older Model
# MODEL_PATH = "models/DETR-run4"


## Load Model
def loadModel(MODEL_PATH, CHECKPOINT):
    model = DetrForObjectDetection.from_pretrained(MODEL_PATH)
    image_processor = DetrImageProcessor.from_pretrained(CHECKPOINT)
    return model, image_processor

In [14]:
from transformers import DetrForObjectDetection
import torch
from collections import OrderedDict

# Initialize the model architecture
model, image_processor = loadModel(MODEL_PATH=MODEL_PATH, CHECKPOINT=CHECKPOINT)

In [15]:

# model, image_processor = loadModel(MODEL_PATH=MODEL_101, CHECKPOINT=CHECKPOINT_101)

# Load your checkpoint
checkpoint = torch.load("models/DETR-run7/ModelCheckpoints2/detr-epoch=47-val_loss=0.53.ckpt", map_location='cpu')

# Get the state dict
state_dict = checkpoint['state_dict']

# Remove the 'model.model.' prefix from the state dict keys
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k.replace("model.model.", "")
    new_state_dict[name] = v

# Load the modified state dict
model.load_state_dict(new_state_dict, strict=False)

# Move the model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print("Model loaded successfully!")

Model loaded successfully!


In [16]:
## Create Ground Truth Dictionary
# ground_truth = create_ground_truth_dict('dataset2/test/result.json')

# For Older models
ground_truth = create_ground_truth_dict('dataset2/test/result.json')

In [17]:
categories = TEST_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
id2label

{0: 'bar-scale', 1: 'color-stamp', 2: 'detail-labels', 3: 'north-sign'}

In [81]:
def add_missing_label(image, save_path, label):
    id2label = {0: 'bar-scale', 1: 'color-stamp', 2: 'detail-labels', 3: 'north-sign'}
    # id2label = {4: 'north-sign', 2: 'color-stamp', 1: 'bar-scale', 3: 'detail-labels'}
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    if label:
        text = ""
        for i in label: 
            text = text + f"{id2label[i]} not detected" + "\n"
    else:
        text = ""
    position = (10, 10)
    draw.text(position, text, fill="red", font=font)
    image.save(save_path)

In [82]:
## Inference Function
IMAGE_FOLDER = 'Temp/images'
CONFIDENCE_THRESHOLD = 0.6
IOU_THRESHOLD = 0.7

def inference(image_folder, CONFIDENCE_THRESHOLD, IOU_THRESHOLD):
    results_dict = {}
    
    for img in os.listdir(image_folder):
        IMAGE_PATH = os.path.join(image_folder, img)
        print(f"Processing {IMAGE_PATH}")

        image = cv2.imread(IMAGE_PATH)
        inputs = image_processor(images=image, return_tensors='pt')

        # Move inputs to the same device as the model
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Get ground truth for this image
        target = ground_truth.get(img, {'boxes': torch.empty((0, 4)), 'labels': torch.empty((0,), dtype=torch.long)})
        target = {k: v for k, v in target.items()}
        # print(target)
        with torch.no_grad():
            outputs = model(**inputs)
            
            # Post-process
            target_sizes = torch.tensor([image.shape[:2]]).to(model.device)
            results = image_processor.post_process_object_detection(
                outputs=outputs,
                threshold=CONFIDENCE_THRESHOLD,
                target_sizes=target_sizes
            )[0]
        
        
        detections = sv.Detections.from_transformers(transformers_results=results).with_nms(IOU_THRESHOLD)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]
        
        print(f"Detected Labels - {set(detections.class_id)}") 
        box_annotator = sv.BoxAnnotator()
        frame = box_annotator.annotate(scene=image, detections=detections, labels=labels)
        
        image = Image.fromarray(frame)
        image_path = f"Temp/results/annotated_{img}"
        all_labels = {0, 1, 2, 3}
        label = all_labels - set(detections.class_id)
        # image.save(image_path)
        add_missing_label(image, image_path, label) # type: ignore
        results_dict[IMAGE_PATH.replace('Temp/', '')] = results
    return results_dict

In [179]:
results

{'images\\11913d58-drawing_88.png': {'scores': tensor([0.5285, 0.9101, 0.6954, 0.6353, 0.7784, 0.9503]),
  'labels': tensor([1, 3, 2, 2, 3, 2]),
  'boxes': tensor([[278.4949, 528.8150, 360.5984, 558.0323],
          [ 46.7326,  70.2028,  80.9400, 104.4009],
          [216.3189, 518.3107, 292.1606, 546.7127],
          [214.2663, 519.2744, 292.6411, 547.4644],
          [ 39.9978, 352.8757,  75.7294, 385.8117],
          [623.9429, 267.7162, 710.0599, 292.2522]])},
 'images\\17bd5f92-drawing_106.png': {'scores': tensor([0.9489, 0.8784, 0.8229, 0.9387, 0.9544, 0.8222]),
  'labels': tensor([2, 0, 3, 2, 2, 1]),
  'boxes': tensor([[428.5779, 281.2126, 526.5462, 308.8688],
          [ 48.0613, 513.4713, 136.8942, 533.4201],
          [594.3514,  94.2488, 631.0280, 127.8965],
          [425.9916, 109.7946, 497.0291, 136.6798],
          [ 78.7774, 263.3436, 174.2547, 290.3559],
          [616.6947, 385.5923, 695.2281, 415.3440]])},
 'images\\2067578c-drawing_24.png': {'scores': tensor([0.8869

In [83]:
start = time.time()
results = inference(IMAGE_FOLDER, 0.5, 0.5)
end = time.time()

print(f"Time taken: {end - start:.2f} seconds")

Processing Temp/images\11913d58-drawing_88.png
Detected Labels - {1, 2, 3}
Processing Temp/images\17bd5f92-drawing_106.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\2067578c-drawing_24.png
Detected Labels - {0, 2, 3}
Processing Temp/images\2466f98b-drawing_67.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\3108be30-drawing_43.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\449d473c-drawing_25.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\4fdca17b-drawing_7.png
Detected Labels - {1, 2, 3}
Processing Temp/images\5acab2e6-drawing_64.png
Detected Labels - {0, 2, 3}
Processing Temp/images\8318502d-drawing_15.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\92e4c80c-drawing_45.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\a01b4a37-drawing_168.png
Detected Labels - {1, 2, 3}
Processing Temp/images\a34b8e8d-drawing_44.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\a55f419c-drawing_48.png
Detected Labels - {0, 1, 3}
Pro

In [58]:
def savePDF(image_dir, pdf_path):
    imagelist = []
    im = None
    for path in os.listdir(image_dir):
        im = Image.open(os.path.join(image_dir, path)).convert('RGB')
        imagelist.append(im)
    # im3 = image3.convert('RGB')

    im.save(pdf_path,save_all=True, append_images=imagelist)

In [59]:
savePDF(image_dir='Temp/results', pdf_path='output.pdf')

### Create Predictions DataFrame

In [84]:
predictions_df = pd.DataFrame()
for k in results:
    df = pd.DataFrame(results[k]['boxes'].detach().to('cpu').numpy(), columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = results[k]['labels'].detach().to('cpu').numpy()
    df['image'] = k.replace('images\\', '')
    # print(df['image'])
    predictions_df = pd.concat([predictions_df, df], ignore_index=True)
predictions_df.shape

(110, 6)

In [85]:
predictions_df['labels'].value_counts()

labels
2    51
3    21
1    20
0    18
Name: count, dtype: int64

### Create Ground Truths DataFrame

In [86]:
ground_truth_df = pd.DataFrame()
for k in ground_truth:
    df = pd.DataFrame(ground_truth[k]['boxes'], columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = ground_truth[k]['labels']
    # print(k)
    df['image'] = k.replace('images/', '')
    # print(df['image'])
    ground_truth_df = pd.concat([ground_truth_df, df], ignore_index=True)
    
ground_truth_df.shape

(116, 6)

In [87]:
ground_truth_df['labels'].value_counts()

labels
2    50
0    27
3    22
1    17
Name: count, dtype: int64

## Sort DFs

In [88]:
ground_truth_df = ground_truth_df.sort_values(by=['image', 'labels', 'y1', 'x1'])
predictions_df = predictions_df.sort_values(by=['image', 'labels', 'y1', 'x1'])

### Row Count

In [89]:
ground_truth_df['row_count'] = ground_truth_df.groupby(['image', 'labels']).cumcount() + 1
predictions_df['row_count'] = predictions_df.groupby(['image', 'labels']).cumcount() + 1

In [90]:
# ground_truth_df.head(10)
predictions_df.head(10)

Unnamed: 0,x1,y1,x2,y2,labels,image,row_count
0,278.494934,528.815002,360.598358,558.032288,1,11913d58-drawing_88.png,1
5,623.942871,267.716156,710.059937,292.252167,2,11913d58-drawing_88.png,1
2,216.318863,518.310669,292.160645,546.712708,2,11913d58-drawing_88.png,2
3,214.266266,519.274414,292.641144,547.464417,2,11913d58-drawing_88.png,3
1,46.73262,70.202827,80.939987,104.40094,3,11913d58-drawing_88.png,1
4,39.997841,352.875702,75.729424,385.811737,3,11913d58-drawing_88.png,2
7,48.061279,513.471252,136.894226,533.420105,0,17bd5f92-drawing_106.png,1
11,616.694702,385.592316,695.228088,415.344025,1,17bd5f92-drawing_106.png,1
9,425.991577,109.794601,497.029144,136.679779,2,17bd5f92-drawing_106.png,1
10,78.777405,263.343567,174.254745,290.355896,2,17bd5f92-drawing_106.png,2


## Merge The DataFrames for Comparison

In [91]:
merged_df = pd.merge(ground_truth_df, predictions_df, on=['image', 'labels', 'row_count'], how='outer', suffixes=('_gt', '_pred'), validate='many_to_many')

In [92]:
merged_df.head(12)

Unnamed: 0,x1_gt,y1_gt,x2_gt,y2_gt,labels,image,row_count,x1_pred,y1_pred,x2_pred,y2_pred
0,626.232558,409.581395,732.186047,429.348837,0,11913d58-drawing_88.png,1,,,,
1,279.116279,529.767442,363.72093,559.023256,1,11913d58-drawing_88.png,1,278.494934,528.815002,360.598358,558.032288
2,626.232558,266.465116,713.209302,290.186047,2,11913d58-drawing_88.png,1,623.942871,267.716156,710.059937,292.252167
3,217.44186,518.697674,278.325581,542.418605,2,11913d58-drawing_88.png,2,216.318863,518.310669,292.160645,546.712708
4,,,,,2,11913d58-drawing_88.png,3,214.266266,519.274414,292.641144,547.464417
5,46.651163,66.418605,83.813953,105.162791,3,11913d58-drawing_88.png,1,46.73262,70.202827,80.939987,104.40094
6,39.534884,351.069767,75.116279,389.813953,3,11913d58-drawing_88.png,2,39.997841,352.875702,75.729424,385.811737
7,46.651163,515.534884,136.790698,535.302326,0,17bd5f92-drawing_106.png,1,48.061279,513.471252,136.894226,533.420105
8,617.534884,386.651163,695.813953,419.860465,1,17bd5f92-drawing_106.png,1,616.694702,385.592316,695.228088,415.344025
9,426.976744,108.325581,498.139535,136.790698,2,17bd5f92-drawing_106.png,1,425.991577,109.794601,497.029144,136.679779


## Calculate IOU

In [93]:
def calculate_iou(box1, box2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.
    
    Parameters:
    - box1: (x1, y1, x2, y2) coordinates of the first bounding box
    - box2: (x1, y1, x2, y2) coordinates of the second bounding box
    
    Returns:
    - iou: Intersection over Union (IoU) value
    """
    # Unpack the coordinates of the two boxes
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    
    # Calculate the (x, y) coordinates of the intersection rectangle
    xi1 = max(x1_1, x1_2)
    yi1 = max(y1_1, y1_2)
    xi2 = min(x2_1, x2_2)
    yi2 = min(y2_1, y2_2)
    
    # Calculate the area of the intersection rectangle
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height
    
    # Calculate the area of both bounding boxes
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    
    # Calculate the union area
    union_area = box1_area + box2_area - inter_area
    
    # Calculate the IoU
    iou = inter_area / union_area if union_area != 0 else 0
    
    return iou

In [94]:
merged_df['iou'] = merged_df.apply(lambda x: calculate_iou((x['x1_gt'], x['y1_gt'], x['x2_gt'], x['y2_gt']), (x['x1_pred'], x['y1_pred'], x['x2_pred'], x['y2_pred'])), axis=1)

## Missing Labels in the Image

In [95]:
# predictions_df.groupby('image')['labels'].unique()
labels_df = predictions_df.groupby('image')['labels'].apply(lambda x: set(x)).reset_index()

all_labels = {0, 1, 2, 3}
labels_df['missing_labels'] = labels_df['labels'].apply(lambda x: all_labels - x)

labels_df

Unnamed: 0,image,labels,missing_labels
0,11913d58-drawing_88.png,"{1, 2, 3}",{0}
1,17bd5f92-drawing_106.png,"{0, 1, 2, 3}",{}
2,2067578c-drawing_24.png,"{0, 2, 3}",{1}
3,2466f98b-drawing_67.png,"{0, 1, 2, 3}",{}
4,3108be30-drawing_43.png,"{0, 1, 2, 3}",{}
5,449d473c-drawing_25.png,"{0, 1, 2, 3}",{}
6,4fdca17b-drawing_7.png,"{1, 2, 3}",{0}
7,5acab2e6-drawing_64.png,"{0, 2, 3}",{1}
8,8318502d-drawing_15.png,"{0, 1, 2, 3}",{}
9,92e4c80c-drawing_45.png,"{0, 1, 2, 3}",{}


## Data Information

In [96]:
# Ground Truth
print(f'Number of Labels in Training Data - {ground_truth_df.shape[0]}')
print(f'Bar Scale Count - {ground_truth_df.loc[ground_truth_df["labels"]==0].shape[0]}')
print(f'Color Stamp Count - {ground_truth_df.loc[ground_truth_df["labels"]==1].shape[0]}')
print(f'Detail Label Count - {ground_truth_df.loc[ground_truth_df["labels"]==2].shape[0]}')
print(f'North Sign Count - {ground_truth_df.loc[ground_truth_df["labels"]==3].shape[0]}')

Number of Labels in Training Data - 116
Bar Scale Count - 27
Color Stamp Count - 17
Detail Label Count - 50
North Sign Count - 22


In [97]:
# Predictions
print(f'Number of Labels in Predictions - {predictions_df.shape[0]}')
print(f'Bar Scale Count - {predictions_df.loc[predictions_df["labels"]==0].shape[0]}')
print(f'Color Stamp Count - {predictions_df.loc[predictions_df["labels"]==1].shape[0]}')
print(f'Detail Label Count - {predictions_df.loc[predictions_df["labels"]==2].shape[0]}')
print(f'North Sign Count - {predictions_df.loc[predictions_df["labels"]==3].shape[0]}')

Number of Labels in Predictions - 110
Bar Scale Count - 18
Color Stamp Count - 20
Detail Label Count - 51
North Sign Count - 21


## Ground Truth & Predictions 
### Shape Comparison 

In [98]:
ground_truth_df.shape

(116, 7)

In [99]:
predictions_df.shape

(110, 7)

## Page-wise TP, FP, FN

## Final Testing

In [100]:
def final_result(image, ground_truth, results):
    TP, FP, FN = 0, 0, 0

    result_dict = []
    for i in range(len(ground_truth[f'images/{image}']['labels'])):
        label = ground_truth[f'images/{image}']['labels'][i]
        # label = 2 
        match = False
        for j in range(len(results[f'images\\{image}']['labels'])):
            pred_label = results[f'images\\{image}']['labels'][j].numpy()
            iou = calculate_iou(ground_truth[f'images/{image}']['boxes'][i], results[f'images\\{image}']['boxes'][j].numpy())
            if iou > 0.6:
                if pred_label == label:
                    result_dict.append({
                        'image': image,
                        'label': label, 
                        'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                        'prediction': results[f'images\\{image}']['boxes'][j].numpy(),
                        'iou': iou, 
                        'result': 'TP',                                      
                    })
                    # TP = TP + 1
                    match = True
                    continue
                elif pred_label != label:
                    result_dict.append({
                        'image': image,
                        'label': pred_label, 
                        'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                        'prediction': results[f'images\\{image}']['boxes'][j].numpy(),
                        'iou': iou, 
                        'result': 'FP',                
                    })
                    continue
            
        if match == False:
            result_dict.append({
                'image': image,
                'label': label, 
                'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                'prediction': [],
                'iou': 0, 
                'result': 'FN',                
            })
    return result_dict
                    

In [101]:
results

{'images\\11913d58-drawing_88.png': {'scores': tensor([0.5285, 0.9101, 0.6954, 0.6353, 0.7784, 0.9503]),
  'labels': tensor([1, 3, 2, 2, 3, 2]),
  'boxes': tensor([[278.4949, 528.8150, 360.5984, 558.0323],
          [ 46.7326,  70.2028,  80.9400, 104.4009],
          [216.3189, 518.3107, 292.1606, 546.7127],
          [214.2663, 519.2744, 292.6411, 547.4644],
          [ 39.9978, 352.8757,  75.7294, 385.8117],
          [623.9429, 267.7162, 710.0599, 292.2522]])},
 'images\\17bd5f92-drawing_106.png': {'scores': tensor([0.9489, 0.8784, 0.8229, 0.9387, 0.9544, 0.8222]),
  'labels': tensor([2, 0, 3, 2, 2, 1]),
  'boxes': tensor([[428.5779, 281.2126, 526.5462, 308.8688],
          [ 48.0613, 513.4713, 136.8942, 533.4201],
          [594.3514,  94.2488, 631.0280, 127.8965],
          [425.9916, 109.7946, 497.0291, 136.6798],
          [ 78.7774, 263.3436, 174.2547, 290.3559],
          [616.6947, 385.5923, 695.2281, 415.3440]])},
 'images\\2067578c-drawing_24.png': {'scores': tensor([0.8869

In [102]:
result_dict = final_result('eef413ab-drawing_22.png', ground_truth, results)
# pd.DataFrame(result_dict)

In [103]:
results

{'images\\11913d58-drawing_88.png': {'scores': tensor([0.5285, 0.9101, 0.6954, 0.6353, 0.7784, 0.9503]),
  'labels': tensor([1, 3, 2, 2, 3, 2]),
  'boxes': tensor([[278.4949, 528.8150, 360.5984, 558.0323],
          [ 46.7326,  70.2028,  80.9400, 104.4009],
          [216.3189, 518.3107, 292.1606, 546.7127],
          [214.2663, 519.2744, 292.6411, 547.4644],
          [ 39.9978, 352.8757,  75.7294, 385.8117],
          [623.9429, 267.7162, 710.0599, 292.2522]])},
 'images\\17bd5f92-drawing_106.png': {'scores': tensor([0.9489, 0.8784, 0.8229, 0.9387, 0.9544, 0.8222]),
  'labels': tensor([2, 0, 3, 2, 2, 1]),
  'boxes': tensor([[428.5779, 281.2126, 526.5462, 308.8688],
          [ 48.0613, 513.4713, 136.8942, 533.4201],
          [594.3514,  94.2488, 631.0280, 127.8965],
          [425.9916, 109.7946, 497.0291, 136.6798],
          [ 78.7774, 263.3436, 174.2547, 290.3559],
          [616.6947, 385.5923, 695.2281, 415.3440]])},
 'images\\2067578c-drawing_24.png': {'scores': tensor([0.8869

In [104]:
new_df = pd.DataFrame()
for image in os.listdir('Temp/images'):
    result_dict = final_result(image, ground_truth, results)
    temp_df = pd.DataFrame(result_dict)
    
    new_df = pd.concat([new_df, temp_df], ignore_index=True)
    

In [105]:
new_df

Unnamed: 0,image,label,ground_truth,prediction,iou,result
0,11913d58-drawing_88.png,3,"[39.53488372093023, 351.06976744186045, 75.116...","[39.99784, 352.8757, 75.72942, 385.81174]",0.826916,TP
1,11913d58-drawing_88.png,3,"[46.65116279069767, 66.41860465116278, 83.8139...","[46.73262, 70.20283, 80.93999, 104.40094]",0.812469,TP
2,11913d58-drawing_88.png,1,"[279.1162790697674, 529.7674418604652, 363.720...","[278.49493, 528.815, 360.59836, 558.0323]",0.895812,TP
3,11913d58-drawing_88.png,0,"[626.2325581395348, 409.58139534883725, 732.18...",[],0.000000,FN
4,11913d58-drawing_88.png,2,"[217.4418604651163, 518.6976744186046, 278.325...","[216.31886, 518.31067, 292.16064, 546.7127]",0.670463,TP
...,...,...,...,...,...,...
116,f601361a-drawing_42.png,0,"[57.72093023255814, 523.4418604651163, 162.883...","[58.494934, 523.1484, 164.0457, 545.7561]",0.912949,TP
117,f601361a-drawing_42.png,2,"[232.46511627906978, 407.99999999999994, 268.0...","[231.28055, 407.37708, 268.8685, 423.36612]",0.886240,TP
118,f601361a-drawing_42.png,2,"[242.74418604651163, 242.7441860465117, 294.13...","[242.53752, 244.55257, 294.23538, 260.27994]",0.794493,TP
119,f601361a-drawing_42.png,2,"[223.7674418604651, 170.7906976744186, 275.953...","[225.03946, 173.67342, 276.31174, 188.2437]",0.677075,TP


In [108]:
new_df['result'].value_counts()

result
TP    104
FN     13
FP      4
Name: count, dtype: int64

In [109]:
TP, FN, FP= new_df['result'].value_counts()
# FP = 0

In [110]:
print(f"Accuracy = {TP/(TP+FP+FN):.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.8595
Precision = 0.9630
Recall = 0.8889


### Classwise Accuracy

In [111]:
new_df.loc[new_df['label'] == 0]['result'].value_counts()

result
TP    17
FN    10
Name: count, dtype: int64

In [112]:
# Barscale 
TP, FN = new_df.loc[new_df['label'] == 0]['result'].value_counts()
FP = 0
# FN = 0
accuracy0 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy0:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")


Accuracy = 0.6296
Precision = 1.0000
Recall = 0.6296


In [113]:
new_df.loc[new_df['label'] == 1]['result'].value_counts()

result
TP    17
FP     3
Name: count, dtype: int64

In [114]:
# Color Stamp
TP, FP = new_df.loc[new_df['label'] == 1]['result'].value_counts()
FN = 0
# FP = 0
accuracy1= TP/(TP+FP+FN)
print(f"Accuracy = {accuracy1:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.8500
Precision = 0.8500
Recall = 1.0000


In [115]:
# Detail Labels
TP, FN, FP = new_df.loc[new_df['label'] == 2]['result'].value_counts()
accuracy2 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy2:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9423
Precision = 0.9800
Recall = 0.9608


In [116]:
# North Sign
TP, FN = new_df.loc[new_df['label'] == 3]['result'].value_counts()
FP = 0
accuracy3 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy3:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9545
Precision = 1.0000
Recall = 0.9545


In [117]:
# Model Accuracy 
print(f"Model Accuracy = {(accuracy0 + accuracy1 + accuracy2 + accuracy3)/4:.4f}")

Model Accuracy = 0.8441


In [118]:
image_analysis_df = pd.DataFrame()
image_analysis_df['image'] = new_df['image'].unique()
image_analysis_df['TP'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('TP', 0)).values
image_analysis_df['FP'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('FP', 0)).values
image_analysis_df['FN'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('FN', 0)).values

In [119]:
## TP, FP, FN count by image
image_analysis_df

Unnamed: 0,image,TP,FP,FN
0,11913d58-drawing_88.png,6,0,1
1,17bd5f92-drawing_106.png,6,0,0
2,2067578c-drawing_24.png,4,0,1
3,2466f98b-drawing_67.png,9,0,0
4,3108be30-drawing_43.png,5,0,0
5,449d473c-drawing_25.png,7,0,1
6,4fdca17b-drawing_7.png,3,1,1
7,5acab2e6-drawing_64.png,5,0,0
8,8318502d-drawing_15.png,5,0,0
9,92e4c80c-drawing_45.png,7,0,0


In [120]:
new_df

Unnamed: 0,image,label,ground_truth,prediction,iou,result
0,11913d58-drawing_88.png,3,"[39.53488372093023, 351.06976744186045, 75.116...","[39.99784, 352.8757, 75.72942, 385.81174]",0.826916,TP
1,11913d58-drawing_88.png,3,"[46.65116279069767, 66.41860465116278, 83.8139...","[46.73262, 70.20283, 80.93999, 104.40094]",0.812469,TP
2,11913d58-drawing_88.png,1,"[279.1162790697674, 529.7674418604652, 363.720...","[278.49493, 528.815, 360.59836, 558.0323]",0.895812,TP
3,11913d58-drawing_88.png,0,"[626.2325581395348, 409.58139534883725, 732.18...",[],0.000000,FN
4,11913d58-drawing_88.png,2,"[217.4418604651163, 518.6976744186046, 278.325...","[216.31886, 518.31067, 292.16064, 546.7127]",0.670463,TP
...,...,...,...,...,...,...
116,f601361a-drawing_42.png,0,"[57.72093023255814, 523.4418604651163, 162.883...","[58.494934, 523.1484, 164.0457, 545.7561]",0.912949,TP
117,f601361a-drawing_42.png,2,"[232.46511627906978, 407.99999999999994, 268.0...","[231.28055, 407.37708, 268.8685, 423.36612]",0.886240,TP
118,f601361a-drawing_42.png,2,"[242.74418604651163, 242.7441860465117, 294.13...","[242.53752, 244.55257, 294.23538, 260.27994]",0.794493,TP
119,f601361a-drawing_42.png,2,"[223.7674418604651, 170.7906976744186, 275.953...","[225.03946, 173.67342, 276.31174, 188.2437]",0.677075,TP


In [160]:
predictions_df = pd.DataFrame()
for k in results:
    df = pd.DataFrame(results[k]['boxes'].detach().to('cpu').numpy(), columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = results[k]['labels'].detach().to('cpu').numpy()
    df['image'] = k.replace('images\\', '')
    # print(df['image'])
    predictions_df = pd.concat([predictions_df, df], ignore_index=True)
predictions_df.shape

(110, 6)

In [166]:
image_names = predictions_df['image'].unique()
labels = [0,1,2,3]

rows = []

# Populate the new rows
for idx, image_name in enumerate(image_names):
    row = {'image_name': image_name}
    image_df = predictions_df[predictions_df['image'] == image_name]
    for label in labels:
        label_present = any(image_df['labels'] == label)
        label_count = image_df[image_df['labels'] == label].shape[0]
        row[f'{id2label[label]}'] = label_present
        row[f'{id2label[label]}_count'] = label_count
    rows.append(row)

# Create a new dataframe from the list of rows
df2 = pd.DataFrame(rows)

Unnamed: 0,id,image_name,label_0,label_0_count,label_1,label_1_count,label_2,label_2_count,label_3,label_3_count


In [176]:

# df2.drop(columns=['id'], inplace=True)

In [177]:
df2

Unnamed: 0,image_name,bar-scale,bar-scale_count,color-stamp,color-stamp_count,detail-labels,detail-labels_count,north-sign,north-sign_count
0,11913d58-drawing_88.png,False,0,True,1,True,3,True,2
1,17bd5f92-drawing_106.png,True,1,True,1,True,3,True,1
2,2067578c-drawing_24.png,True,1,False,0,True,2,True,1
3,2466f98b-drawing_67.png,True,4,True,1,True,3,True,1
4,3108be30-drawing_43.png,True,1,True,1,True,2,True,1
5,449d473c-drawing_25.png,True,1,True,1,True,5,True,1
6,4fdca17b-drawing_7.png,False,0,True,2,True,1,True,1
7,5acab2e6-drawing_64.png,True,1,False,0,True,3,True,1
8,8318502d-drawing_15.png,True,1,True,1,True,2,True,1
9,92e4c80c-drawing_45.png,True,1,True,1,True,4,True,1


In [178]:
predictions_df

Unnamed: 0,x1,y1,x2,y2,labels,image
0,278.494934,528.815002,360.598358,558.032288,1,11913d58-drawing_88.png
1,46.732620,70.202827,80.939987,104.400940,3,11913d58-drawing_88.png
2,216.318863,518.310669,292.160645,546.712708,2,11913d58-drawing_88.png
3,214.266266,519.274414,292.641144,547.464417,2,11913d58-drawing_88.png
4,39.997841,352.875702,75.729424,385.811737,3,11913d58-drawing_88.png
...,...,...,...,...,...,...
105,594.378418,182.227692,640.603699,198.663849,2,f601361a-drawing_42.png
106,242.537521,244.552567,294.235382,260.279938,2,f601361a-drawing_42.png
107,231.280548,407.377075,268.868500,423.366119,2,f601361a-drawing_42.png
108,442.028168,524.323975,524.895569,556.557678,1,f601361a-drawing_42.png
