## Notes

DETR-run5 - Accuracy = 94.8%

DETR-run7 - Accuracy = 84.5%

DETR-run13 - Accuracy = 82.7%

DETR-resnet34-1 - Accuracy = 92.7%

In [2]:
import os 
import supervision as sv
from transformers import DetrForObjectDetection, DetrImageProcessor
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from PIL import Image, ImageDraw, ImageFont



  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from core.inference import loadModel
from core.img2pdf import readPDF, savePDF
from core.createGroundTruth import create_ground_truth_dict
import time
import torchvision
from torchvision.ops import box_iou
import cv2
import random
import numpy as np
import pandas as pd

In [41]:
## CocoDetection Class 
image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# settings
ANNOTATION_FILE_NAME = r"result.json"
TRAIN_DIRECTORY = os.path.join(r"data/dataset4", r"train")
VAL_DIRECTORY = os.path.join(r"data/dataset4", r"val")
TEST_DIRECTORY = os.path.join(r"data/dataset4", r"test")

class CocoDetection(torchvision.datasets.CocoDetection):
    def __init__(
        self,
        image_directory_path: str,
        image_processor,
        train: bool = True
    ):
        annotation_file_path = os.path.join(image_directory_path, ANNOTATION_FILE_NAME)
        super(CocoDetection, self).__init__(image_directory_path, annotation_file_path)
        self.image_processor = image_processor

    def __getitem__(self, idx):
        images, annotations = super(CocoDetection, self).__getitem__(idx)
        image_id = self.ids[idx]
        annotations = {'image_id': image_id, 'annotations': annotations}
        encoding = self.image_processor(images=images, annotations=annotations, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze()
        target = encoding["labels"][0]

        return pixel_values, target
    
TRAIN_DATASET = CocoDetection(
    image_directory_path=TRAIN_DIRECTORY,
    image_processor=image_processor,
    train=True)
VAL_DATASET = CocoDetection(
    image_directory_path=VAL_DIRECTORY,
    image_processor=image_processor,
    train=False)
TEST_DATASET = CocoDetection(
    image_directory_path=TEST_DIRECTORY,
    image_processor=image_processor,
    train=False)

print("Number of training examples:", len(TRAIN_DATASET))
print("Number of validation examples:", len(VAL_DATASET))
print("Number of test examples:", len(TEST_DATASET))

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Number of training examples: 145
Number of validation examples: 32
Number of test examples: 19


In [5]:
CHECKPOINT = "facebook/detr-resnet-50"

# Best Performing Model
MODEL_PATH = "models/DETR-resnet34-1"

# Doesnt Work
# MODEL_101 = 'facebook/detr-resnet-101'
# CHECKPOINT_101 = 'facebook/detr-resnet-101'

# Older Model
# MODEL_PATH = "models/DETR-run4"


## Load Model
def loadModel(MODEL_PATH, CHECKPOINT):
    model = DetrForObjectDetection.from_pretrained(MODEL_PATH)
    image_processor = DetrImageProcessor.from_pretrained(CHECKPOINT)
    return model, image_processor

In [6]:
from transformers import DetrForObjectDetection
import torch
from collections import OrderedDict

# Initialize the model architecture
model, image_processor = loadModel(MODEL_PATH=MODEL_PATH, CHECKPOINT=CHECKPOINT)

In [7]:

# model, image_processor = loadModel(MODEL_PATH=MODEL_101, CHECKPOINT=CHECKPOINT_101)

# Load your checkpoint
# checkpoint = torch.load("models/DETR-run17/detr-epoch=99-val_loss=0.46.ckpt", map_location='cpu')

# # Get the state dict
# state_dict = checkpoint['state_dict']

# # Remove the 'model.model.' prefix from the state dict keys
# new_state_dict = OrderedDict()
# for k, v in state_dict.items():
#     name = k.replace("model.model.", "")
#     new_state_dict[name] = v

# # Load the modified state dict
# model.load_state_dict(new_state_dict, strict=False)

# # Move the model to GPU if available
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model.to(device)

print("Model loaded successfully!")

Model loaded successfully!


In [42]:
## Create Ground Truth Dictionary
# ground_truth = create_ground_truth_dict('dataset2/test/result.json')

# For Older models
ground_truth = create_ground_truth_dict('data/dataset4/test/result.json')

In [9]:
categories = TEST_DATASET.coco.cats
id2label = {k: v['name'] for k,v in categories.items()}
id2label

{0: 'bar-scale', 1: 'color-stamp', 2: 'detail-labels', 3: 'north-sign'}

In [10]:
def add_missing_label(image, save_path, label):
    id2label = {0: 'bar-scale', 1: 'color-stamp', 2: 'detail-labels', 3: 'north-sign'}
    # id2label = {4: 'north-sign', 2: 'color-stamp', 1: 'bar-scale', 3: 'detail-labels'}
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    if label:
        text = ""
        for i in label: 
            text = text + f"{id2label[i]} not detected" + "\n"
    else:
        text = ""
    position = (10, 10)
    draw.text(position, text, fill="red", font=font)
    image.save(save_path)

In [11]:
## Inference Function
IMAGE_FOLDER = 'Temp/images'
CONFIDENCE_THRESHOLD = 0.6
IOU_THRESHOLD = 0.7

def inference(image_folder, CONFIDENCE_THRESHOLD, IOU_THRESHOLD):
    results_dict = {}
    
    for img in os.listdir(image_folder):
        IMAGE_PATH = os.path.join(image_folder, img)
        print(f"Processing {IMAGE_PATH}")

        image = cv2.imread(IMAGE_PATH)
        inputs = image_processor(images=image, return_tensors='pt')

        # Move inputs to the same device as the model
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Get ground truth for this image
        target = ground_truth.get(img, {'boxes': torch.empty((0, 4)), 'labels': torch.empty((0,), dtype=torch.long)})
        target = {k: v for k, v in target.items()}
        # print(target)
        with torch.no_grad():
            outputs = model(**inputs)
            
            # Post-process
            target_sizes = torch.tensor([image.shape[:2]]).to(model.device)
            results = image_processor.post_process_object_detection(
                outputs=outputs,
                threshold=CONFIDENCE_THRESHOLD,
                target_sizes=target_sizes
            )[0]
        
        
        detections = sv.Detections.from_transformers(transformers_results=results).with_nms(IOU_THRESHOLD)
        labels = [f"{id2label[class_id]} {confidence:.2f}" for _, confidence, class_id, _ in detections]
        
        print(f"Detected Labels - {set(detections.class_id)}") 
        box_annotator = sv.BoxAnnotator()
        frame = box_annotator.annotate(scene=image, detections=detections, labels=labels)
        
        image = Image.fromarray(frame)
        image_path = f"Temp/results/annotated_{img}"
        all_labels = {0, 1, 2, 3}
        label = all_labels - set(detections.class_id)
        # image.save(image_path)
        add_missing_label(image, image_path, label) # type: ignore
        results_dict[IMAGE_PATH.replace('Temp/', '')] = results
    return results_dict

In [15]:
start = time.time()
results = inference(IMAGE_FOLDER, 0.4, 0.6)
end = time.time()

print(f"Time taken: {end - start:.2f} seconds")

Processing Temp/images\0f7db672-drawing_88.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\1eecb4f3-drawing_25.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\2832a63f-drawing_44.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\2e4aefb0-drawing_15.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\3c56fa90-drawing_7.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\594b3b57-drawing_113.png
Detected Labels - {0, 2, 3}
Processing Temp/images\5b5d1b9b-drawing_123.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\72719062-drawing_43.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\777ab86f-drawing_22.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\78e13516-drawing_45.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\817c3fb2-drawing_42.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\81c64eea-drawing_106.png
Detected Labels - {0, 1, 2, 3}
Processing Temp/images\8e605e02-drawing_56.png
Detected Labels - 

In [16]:
def savePDF(image_dir, pdf_path):
    imagelist = []
    im = None
    for path in os.listdir(image_dir):
        im = Image.open(os.path.join(image_dir, path)).convert('RGB')
        imagelist.append(im)
    # im3 = image3.convert('RGB')

    im.save(pdf_path,save_all=True, append_images=imagelist)

In [17]:
savePDF(image_dir='Temp/results', pdf_path='output.pdf')

### Create Predictions DataFrame

In [43]:
predictions_df = pd.DataFrame()
for k in results:
    df = pd.DataFrame(results[k]['boxes'].detach().to('cpu').numpy(), columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = results[k]['labels'].detach().to('cpu').numpy()
    df['image'] = k.replace('images\\', '')
    # print(df['image'])
    predictions_df = pd.concat([predictions_df, df], ignore_index=True)
predictions_df.shape

(123, 6)

In [44]:
predictions_df['labels'].value_counts()

labels
2    56
0    27
3    23
1    17
Name: count, dtype: int64

### Create Ground Truths DataFrame

In [45]:
ground_truth_df = pd.DataFrame()
for k in ground_truth:
    df = pd.DataFrame(ground_truth[k]['boxes'], columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = ground_truth[k]['labels']
    # print(k)
    df['image'] = k.replace('images/', '')
    # print(df['image'])
    ground_truth_df = pd.concat([ground_truth_df, df], ignore_index=True)
    
ground_truth_df.shape

(114, 6)

In [46]:
ground_truth_df['labels'].value_counts()

labels
2    47
0    28
3    22
1    17
Name: count, dtype: int64

## Sort DFs

In [47]:
ground_truth_df = ground_truth_df.sort_values(by=['image', 'labels', 'y1', 'x1'])
predictions_df = predictions_df.sort_values(by=['image', 'labels', 'y1', 'x1'])

### Row Count

In [48]:
ground_truth_df['row_count'] = ground_truth_df.groupby(['image', 'labels']).cumcount() + 1
predictions_df['row_count'] = predictions_df.groupby(['image', 'labels']).cumcount() + 1

In [49]:
# ground_truth_df.head(10)
predictions_df.head(10)

Unnamed: 0,x1,y1,x2,y2,labels,image,row_count
2,623.963196,405.764679,736.779358,431.404663,0,0f7db672-drawing_88.png,1
0,276.354675,529.920471,368.868469,559.25061,1,0f7db672-drawing_88.png,1
5,625.193726,264.014557,717.291321,293.582336,2,0f7db672-drawing_88.png,1
4,215.599579,516.050476,284.105164,545.852112,2,0f7db672-drawing_88.png,2
1,44.842674,67.42247,84.848969,105.70858,3,0f7db672-drawing_88.png,1
3,35.578465,353.856964,78.913193,390.954102,3,0f7db672-drawing_88.png,2
8,39.677917,523.833435,152.472717,549.371277,0,1eecb4f3-drawing_25.png,1
10,444.935791,526.957947,540.1698,560.813477,1,1eecb4f3-drawing_25.png,1
14,468.255829,185.124496,512.289124,206.263962,2,1eecb4f3-drawing_25.png,1
13,671.76001,238.387543,722.909729,258.166748,2,1eecb4f3-drawing_25.png,2


## Merge The DataFrames for Comparison

In [50]:
merged_df = pd.merge(ground_truth_df, predictions_df, on=['image', 'labels', 'row_count'], how='outer', suffixes=('_gt', '_pred'), validate='many_to_many')

In [51]:
merged_df.head(12)

Unnamed: 0,x1_gt,y1_gt,x2_gt,y2_gt,labels,image,row_count,x1_pred,y1_pred,x2_pred,y2_pred
0,617.142857,402.857143,732.0,435.428571,0,0f7db672-drawing_88.png,1,623.963196,405.764679,736.779358,431.404663
1,277.714286,528.0,366.857143,560.571429,1,0f7db672-drawing_88.png,1,276.354675,529.920471,368.868469,559.25061
2,612.0,262.285714,713.142857,294.857143,2,0f7db672-drawing_88.png,1,625.193726,264.014557,717.291321,293.582336
3,214.285714,517.714286,281.142857,543.428571,2,0f7db672-drawing_88.png,2,215.599579,516.050476,284.105164,545.852112
4,44.571429,65.142857,90.857143,108.0,3,0f7db672-drawing_88.png,1,44.842674,67.42247,84.848969,105.70858
5,34.285714,351.428571,78.857143,390.857143,3,0f7db672-drawing_88.png,2,35.578465,353.856964,78.913193,390.954102
6,37.714286,526.285714,161.142857,553.714286,0,1eecb4f3-drawing_25.png,1,39.677917,523.833435,152.472717,549.371277
7,447.428571,526.285714,531.428571,565.714286,1,1eecb4f3-drawing_25.png,1,444.935791,526.957947,540.1698,560.813477
8,462.857143,186.857143,514.285714,209.142857,2,1eecb4f3-drawing_25.png,1,468.255829,185.124496,512.289124,206.263962
9,666.857143,238.285714,726.857143,258.857143,2,1eecb4f3-drawing_25.png,2,671.76001,238.387543,722.909729,258.166748


## Calculate IOU

In [52]:
def calculate_iou(box1, box2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.
    
    Parameters:
    - box1: (x1, y1, x2, y2) coordinates of the first bounding box
    - box2: (x1, y1, x2, y2) coordinates of the second bounding box
    
    Returns:
    - iou: Intersection over Union (IoU) value
    """
    # Unpack the coordinates of the two boxes
    x1_1, y1_1, x2_1, y2_1 = box1
    x1_2, y1_2, x2_2, y2_2 = box2
    
    # Calculate the (x, y) coordinates of the intersection rectangle
    xi1 = max(x1_1, x1_2)
    yi1 = max(y1_1, y1_2)
    xi2 = min(x2_1, x2_2)
    yi2 = min(y2_1, y2_2)
    
    # Calculate the area of the intersection rectangle
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height
    
    # Calculate the area of both bounding boxes
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    
    # Calculate the union area
    union_area = box1_area + box2_area - inter_area
    
    # Calculate the IoU
    iou = inter_area / union_area if union_area != 0 else 0
    
    return iou

In [53]:
merged_df['iou'] = merged_df.apply(lambda x: calculate_iou((x['x1_gt'], x['y1_gt'], x['x2_gt'], x['y2_gt']), (x['x1_pred'], x['y1_pred'], x['x2_pred'], x['y2_pred'])), axis=1)

## Missing Labels in the Image

In [54]:
# predictions_df.groupby('image')['labels'].unique()
labels_df = predictions_df.groupby('image')['labels'].apply(lambda x: set(x)).reset_index()

all_labels = {0, 1, 2, 3}
labels_df['missing_labels'] = labels_df['labels'].apply(lambda x: all_labels - x)

labels_df

Unnamed: 0,image,labels,missing_labels
0,0f7db672-drawing_88.png,"{0, 1, 2, 3}",{}
1,1eecb4f3-drawing_25.png,"{0, 1, 2, 3}",{}
2,2832a63f-drawing_44.png,"{0, 1, 2, 3}",{}
3,2e4aefb0-drawing_15.png,"{0, 1, 2, 3}",{}
4,3c56fa90-drawing_7.png,"{0, 1, 2, 3}",{}
5,594b3b57-drawing_113.png,"{0, 2, 3}",{1}
6,5b5d1b9b-drawing_123.png,"{0, 1, 2, 3}",{}
7,72719062-drawing_43.png,"{0, 1, 2, 3}",{}
8,777ab86f-drawing_22.png,"{0, 1, 2, 3}",{}
9,78e13516-drawing_45.png,"{0, 1, 2, 3}",{}


## Data Information

In [55]:
# Ground Truth
print(f'Number of Labels in Training Data - {ground_truth_df.shape[0]}')
print(f'Bar Scale Count - {ground_truth_df.loc[ground_truth_df["labels"]==0].shape[0]}')
print(f'Color Stamp Count - {ground_truth_df.loc[ground_truth_df["labels"]==1].shape[0]}')
print(f'Detail Label Count - {ground_truth_df.loc[ground_truth_df["labels"]==2].shape[0]}')
print(f'North Sign Count - {ground_truth_df.loc[ground_truth_df["labels"]==3].shape[0]}')

Number of Labels in Training Data - 114
Bar Scale Count - 28
Color Stamp Count - 17
Detail Label Count - 47
North Sign Count - 22


In [56]:
# Predictions
print(f'Number of Labels in Predictions - {predictions_df.shape[0]}')
print(f'Bar Scale Count - {predictions_df.loc[predictions_df["labels"]==0].shape[0]}')
print(f'Color Stamp Count - {predictions_df.loc[predictions_df["labels"]==1].shape[0]}')
print(f'Detail Label Count - {predictions_df.loc[predictions_df["labels"]==2].shape[0]}')
print(f'North Sign Count - {predictions_df.loc[predictions_df["labels"]==3].shape[0]}')

Number of Labels in Predictions - 123
Bar Scale Count - 27
Color Stamp Count - 17
Detail Label Count - 56
North Sign Count - 23


## Ground Truth & Predictions 
### Shape Comparison 

In [57]:
ground_truth_df.shape

(114, 7)

In [58]:
predictions_df.shape

(123, 7)

## Page-wise TP, FP, FN

## Final Testing

In [59]:
def final_result(image, ground_truth, results):
    TP, FP, FN = 0, 0, 0

    result_dict = []
    for i in range(len(ground_truth[f'images/{image}']['labels'])):
        label = ground_truth[f'images/{image}']['labels'][i]
        # label = 2 
        match = False
        for j in range(len(results[f'images\\{image}']['labels'])):
            pred_label = results[f'images\\{image}']['labels'][j].numpy()
            iou = calculate_iou(ground_truth[f'images/{image}']['boxes'][i], results[f'images\\{image}']['boxes'][j].numpy())
            if iou > 0.6:
                if pred_label == label:
                    result_dict.append({
                        'image': image,
                        'label': label, 
                        'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                        'prediction': results[f'images\\{image}']['boxes'][j].numpy(),
                        'iou': iou, 
                        'result': 'TP',                                      
                    })
                    # TP = TP + 1
                    match = True
                    continue
                elif pred_label != label:
                    result_dict.append({
                        'image': image,
                        'label': pred_label, 
                        'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                        'prediction': results[f'images\\{image}']['boxes'][j].numpy(),
                        'iou': iou, 
                        'result': 'FP',                
                    })
                    continue
            
        if match == False:
            result_dict.append({
                'image': image,
                'label': label, 
                'ground_truth': ground_truth[f'images/{image}']['boxes'][i],
                'prediction': [],
                'iou': 0, 
                'result': 'FN',                
            })
    return result_dict
                    

In [60]:
results

{'images\\0f7db672-drawing_88.png': {'scores': tensor([0.7908, 0.9473, 0.6962, 0.8576, 0.8913, 0.9583]),
  'labels': tensor([1, 3, 0, 3, 2, 2]),
  'boxes': tensor([[276.3547, 529.9205, 368.8685, 559.2506],
          [ 44.8427,  67.4225,  84.8490, 105.7086],
          [623.9632, 405.7647, 736.7794, 431.4047],
          [ 35.5785, 353.8570,  78.9132, 390.9541],
          [215.5996, 516.0505, 284.1052, 545.8521],
          [625.1937, 264.0146, 717.2913, 293.5823]])},
 'images\\1eecb4f3-drawing_25.png': {'scores': tensor([0.9483, 0.9452, 0.9636, 0.9264, 0.9093, 0.6513, 0.9543, 0.9502, 0.9699]),
  'labels': tensor([3, 2, 0, 2, 1, 2, 2, 2, 2]),
  'boxes': tensor([[ 37.5246,  64.4590,  82.1843, 101.1923],
          [226.4342, 421.2791, 272.8600, 441.8311],
          [ 39.6779, 523.8334, 152.4727, 549.3713],
          [ 62.1293, 381.2793, 115.6546, 401.2077],
          [444.9358, 526.9579, 540.1698, 560.8135],
          [279.9572, 282.1028, 323.9915, 295.9975],
          [106.4918, 297.8110, 1

In [37]:
results

{'images\\0f7db672-drawing_88.png': {'scores': tensor([0.7908, 0.9473, 0.6962, 0.8576, 0.8913, 0.9583]),
  'labels': tensor([1, 3, 0, 3, 2, 2]),
  'boxes': tensor([[276.3547, 529.9205, 368.8685, 559.2506],
          [ 44.8427,  67.4225,  84.8490, 105.7086],
          [623.9632, 405.7647, 736.7794, 431.4047],
          [ 35.5785, 353.8570,  78.9132, 390.9541],
          [215.5996, 516.0505, 284.1052, 545.8521],
          [625.1937, 264.0146, 717.2913, 293.5823]])},
 'images\\1eecb4f3-drawing_25.png': {'scores': tensor([0.9483, 0.9452, 0.9636, 0.9264, 0.9093, 0.6513, 0.9543, 0.9502, 0.9699]),
  'labels': tensor([3, 2, 0, 2, 1, 2, 2, 2, 2]),
  'boxes': tensor([[ 37.5246,  64.4590,  82.1843, 101.1923],
          [226.4342, 421.2791, 272.8600, 441.8311],
          [ 39.6779, 523.8334, 152.4727, 549.3713],
          [ 62.1293, 381.2793, 115.6546, 401.2077],
          [444.9358, 526.9579, 540.1698, 560.8135],
          [279.9572, 282.1028, 323.9915, 295.9975],
          [106.4918, 297.8110, 1

In [62]:
new_df = pd.DataFrame()
for image in os.listdir('Temp/images'):
    result_dict = final_result(image, ground_truth, results)
    temp_df = pd.DataFrame(result_dict)
    
    new_df = pd.concat([new_df, temp_df], ignore_index=True)
    

In [63]:
new_df

Unnamed: 0,image,label,ground_truth,prediction,iou,result
0,0f7db672-drawing_88.png,3,"[44.57142857142857, 65.14285714285714, 90.8571...","[44.842674, 67.42247, 84.84897, 105.70858]",0.772146,TP
1,0f7db672-drawing_88.png,3,"[34.28571428571428, 351.4285714285714, 78.8571...","[35.578465, 353.85696, 78.91319, 390.9541]",0.907950,TP
2,0f7db672-drawing_88.png,1,"[277.71428571428567, 528.0000000000001, 366.85...","[276.35468, 529.9205, 368.86847, 559.2506]",0.870833,TP
3,0f7db672-drawing_88.png,0,"[617.1428571428571, 402.85714285714283, 731.99...","[623.9632, 405.76468, 736.77936, 431.40466]",0.716963,TP
4,0f7db672-drawing_88.png,2,"[611.9999999999999, 262.2857142857143, 713.142...","[625.1937, 264.01456, 717.2913, 293.58234]",0.761030,TP
...,...,...,...,...,...,...
113,f9310c0d-drawing_24.png,3,"[46.285714285714285, 94.28571428571429, 96.0, ...","[48.86842, 96.074356, 92.83082, 132.31934]",0.836102,TP
114,f9310c0d-drawing_24.png,0,"[47.99999999999999, 521.1428571428571, 133.714...","[44.44167, 522.7573, 128.53098, 551.11816]",0.732884,TP
115,f9310c0d-drawing_24.png,2,"[104.57142857142856, 416.57142857142856, 174.8...","[104.48722, 413.81842, 178.93121, 443.91373]",0.855460,TP
116,f9310c0d-drawing_24.png,2,"[370.2857142857143, 426.85714285714283, 444.0,...","[369.92804, 423.63702, 442.32217, 454.51657]",0.833239,TP


In [64]:
new_df['result'].value_counts()

result
TP    109
FN      7
FP      2
Name: count, dtype: int64

In [65]:
TP, FN, FP= new_df['result'].value_counts()
# FP = 0

In [66]:
print(f"Accuracy = {TP/(TP+FP+FN):.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9237
Precision = 0.9820
Recall = 0.9397


### Classwise Accuracy

In [67]:
new_df.loc[new_df['label'] == 0]['result'].value_counts()

result
TP    25
FN     3
Name: count, dtype: int64

In [69]:
# Barscale 
TP, FN = new_df.loc[new_df['label'] == 0]['result'].value_counts()
FP = 0
# FN = 0
accuracy0 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy0:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")


Accuracy = 0.8929
Precision = 1.0000
Recall = 0.8929


In [70]:
new_df.loc[new_df['label'] == 1]['result'].value_counts()

result
TP    16
FN     1
Name: count, dtype: int64

In [71]:
# Color Stamp
TP, FN = new_df.loc[new_df['label'] == 1]['result'].value_counts()
# FN = 0
FP = 0
accuracy1= TP/(TP+FP+FN)
print(f"Accuracy = {accuracy1:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9412
Precision = 1.0000
Recall = 0.9412


In [72]:
new_df.loc[new_df['label'] == 2]['result'].value_counts()

result
TP    47
FN     2
FP     2
Name: count, dtype: int64

In [73]:
# Detail Labels
TP, FN, FP = new_df.loc[new_df['label'] == 2]['result'].value_counts()
# FN, FP = 0, 0
# TP = 55
accuracy2 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy2:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9216
Precision = 0.9592
Recall = 0.9592


In [74]:
new_df.loc[new_df['label'] == 3]['result'].value_counts()

result
TP    21
FN     1
Name: count, dtype: int64

In [75]:
# North Sign
TP, FN = new_df.loc[new_df['label'] == 3]['result'].value_counts()
# TP = 20
# FP, FN = 0, 0
FP = 0
accuracy3 = TP/(TP+FP+FN)
print(f"Accuracy = {accuracy3:.4f}")
print(f"Precision = {TP/(TP+FP):.4f}")
print(f"Recall = {TP/(TP+FN):.4f}")

Accuracy = 0.9545
Precision = 1.0000
Recall = 0.9545


In [76]:
# Model Accuracy 
print(f"Model Accuracy = {(accuracy0 + accuracy1 + accuracy2 + accuracy3)/4:.4f}")

Model Accuracy = 0.9275


In [77]:
image_analysis_df = pd.DataFrame()
image_analysis_df['image'] = new_df['image'].unique()
image_analysis_df['TP'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('TP', 0)).values
image_analysis_df['FP'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('FP', 0)).values
image_analysis_df['FN'] = new_df.groupby('image')['result'].apply(lambda x: x.value_counts().get('FN', 0)).values

In [78]:
## TP, FP, FN count by image
image_analysis_df

Unnamed: 0,image,TP,FP,FN
0,0f7db672-drawing_88.png,6,0,0
1,1eecb4f3-drawing_25.png,6,0,1
2,2832a63f-drawing_44.png,4,2,1
3,2e4aefb0-drawing_15.png,5,0,0
4,3c56fa90-drawing_7.png,6,0,0
5,594b3b57-drawing_113.png,5,0,0
6,5b5d1b9b-drawing_123.png,6,0,0
7,72719062-drawing_43.png,5,0,0
8,777ab86f-drawing_22.png,6,0,0
9,78e13516-drawing_45.png,5,0,1


In [126]:
new_df

Unnamed: 0,image,label,ground_truth,prediction,iou,result
0,17bd5f92-drawing_106.png,3,"[595.3953488372093, 89.34883720930233, 630.976...","[594.5546, 91.89246, 629.8985, 127.042946]",0.878849,TP
1,17bd5f92-drawing_106.png,1,"[617.5348837209303, 386.6511627906977, 695.813...","[615.8867, 383.9523, 697.3328, 418.20605]",0.846111,TP
2,17bd5f92-drawing_106.png,0,"[46.65116279069767, 515.5348837209302, 136.790...","[47.078247, 513.19214, 135.60881, 533.5106]",0.800008,TP
3,17bd5f92-drawing_106.png,2,"[76.69767441860465, 260.9302325581395, 173.953...","[78.388084, 258.4962, 172.31276, 284.63123]",0.852094,TP
4,17bd5f92-drawing_106.png,2,"[430.1395348837209, 279.906976744186, 529.7674...","[431.6302, 278.01755, 523.4372, 304.2543]",0.829630,TP
...,...,...,...,...,...,...
112,f601361a-drawing_42.png,0,"[57.72093023255814, 523.4418604651163, 162.883...","[57.942734, 522.99347, 163.91122, 545.0451]",0.884097,TP
113,f601361a-drawing_42.png,2,"[232.46511627906978, 407.99999999999994, 268.0...","[233.40746, 408.09653, 269.31165, 423.29657]",0.904801,TP
114,f601361a-drawing_42.png,2,"[242.74418604651163, 242.7441860465117, 294.13...","[242.26714, 243.00899, 293.5023, 257.48267]",0.896285,TP
115,f601361a-drawing_42.png,2,"[223.7674418604651, 170.7906976744186, 275.953...","[224.25545, 171.85445, 276.95264, 186.34465]",0.873790,TP


In [79]:
predictions_df = pd.DataFrame()
for k in results:
    df = pd.DataFrame(results[k]['boxes'].detach().to('cpu').numpy(), columns=['x1', 'y1', 'x2', 'y2'])
    df['labels'] = results[k]['labels'].detach().to('cpu').numpy()
    df['image'] = k.replace('images\\', '')
    # print(df['image'])
    predictions_df = pd.concat([predictions_df, df], ignore_index=True)
predictions_df.shape

(123, 6)

In [80]:
image_names = predictions_df['image'].unique()
labels = [0,1,2,3]

rows = []

# Populate the new rows
for idx, image_name in enumerate(image_names):
    row = {'image_name': image_name}
    image_df = predictions_df[predictions_df['image'] == image_name]
    for label in labels:
        label_present = any(image_df['labels'] == label)
        label_count = image_df[image_df['labels'] == label].shape[0]
        row[f'{id2label[label]}'] = label_present
        row[f'{id2label[label]}_count'] = label_count
    rows.append(row)

# Create a new dataframe from the list of rows
df2 = pd.DataFrame(rows)

In [81]:

# df2.drop(columns=['id'], inplace=True)

In [82]:
df2

Unnamed: 0,image_name,bar-scale,bar-scale_count,color-stamp,color-stamp_count,detail-labels,detail-labels_count,north-sign,north-sign_count
0,0f7db672-drawing_88.png,True,1,True,1,True,2,True,2
1,1eecb4f3-drawing_25.png,True,1,True,1,True,6,True,1
2,2832a63f-drawing_44.png,True,2,True,1,True,2,True,1
3,2e4aefb0-drawing_15.png,True,1,True,1,True,3,True,1
4,3c56fa90-drawing_7.png,True,1,True,1,True,3,True,1
5,594b3b57-drawing_113.png,True,1,False,0,True,3,True,1
6,5b5d1b9b-drawing_123.png,True,2,True,1,True,2,True,2
7,72719062-drawing_43.png,True,1,True,1,True,2,True,1
8,777ab86f-drawing_22.png,True,1,True,1,True,3,True,1
9,78e13516-drawing_45.png,True,1,True,1,True,4,True,1


In [131]:
predictions_df

Unnamed: 0,x1,y1,x2,y2,labels,image
0,615.886719,383.952301,697.332825,418.206055,1,17bd5f92-drawing_106.png
1,78.388084,258.496185,172.312759,284.631226,2,17bd5f92-drawing_106.png
2,431.630188,278.017548,523.437195,304.254303,2,17bd5f92-drawing_106.png
3,594.554626,91.892464,629.898499,127.042946,3,17bd5f92-drawing_106.png
4,47.078247,513.192139,135.608810,533.510620,0,17bd5f92-drawing_106.png
...,...,...,...,...,...,...
114,233.407455,408.096527,269.311646,423.296570,2,f601361a-drawing_42.png
115,595.760620,179.227219,641.968628,195.500916,2,f601361a-drawing_42.png
116,47.474411,88.250298,82.988251,125.105049,3,f601361a-drawing_42.png
117,57.942734,522.993469,163.911224,545.045105,0,f601361a-drawing_42.png
