### Important libraries

In [6]:
import time
from pathlib import Path
import os

import cv2
import torch
from numpy import random
import pandas as pd
import numpy as np

from models.experimental import attempt_load
from utils.datasets import LoadImages
from utils.general import non_max_suppression, scale_coords, xyxy2xywh
from utils.torch_utils import select_device

## Getting all the detections using our model and creating a dataframe

In [2]:
#Plot with just bounding box
def plot_one_box(x, image, color=None, label=None, line_thickness=3):
#     img = np.zeros_like(image, np.uint8)
    # Plots one bounding box on image img
    tl = line_thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(image, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    
    return image

def detect(folder_name, results_out=False, save_dir = "data/try/result/", model_name = 'best.pt', 
           save_img=False, imgsz = 640, conf_th = 0.5, iou_th = 0.45):
    final_out = []
    
    # Initialize
    device = select_device('cpu')

    # Load model
    model = attempt_load(model_name, map_location=device)

    # Loading images
    dataset = LoadImages(folder_name, img_size=imgsz)

    # Get names and colors
    names = model.names
    colors = [(105, 40, 123), (25, 179, 255), (76, 20, 224)]
    
    t0 = time.time()
    
    final_df = pd.DataFrame(columns=['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label', 'img_name'])
    label_dict = {0: 'unripe', 1: 'partially_ripe', 2: 'fully_ripe'}
    
    count = 0
    total_time = 0
    for p, img, im0s, _ in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.float()
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time.time()
        with torch.no_grad(): 
            pred = model(img)[0]  # Calculating gradients would cause a GPU memory leak

        # Apply NMS
        pred = non_max_suppression(pred, conf_th, iou_th)
        t2 = time.time()
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            s, im0, frame = '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path

            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                temp_df = pd.DataFrame(det.numpy(), columns=['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label'])
                temp_df['label'] = temp_df['label'].apply(lambda x: label_dict[int(x)])
                temp_df['img_name'] = p.name

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    line = (cls, *xywh, conf)
                    final_out.append(line)

                    # Add bbox to image
                    label = f'{names[int(cls)]} {conf:.2f}'
                    im0 = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2)
                    
                if results_out:
                    cv2.imwrite(str(save_dir+'/'+ p.name), im0)
                    
                final_df = pd.concat([final_df, temp_df], ignore_index=True)
                
            # Print inference time of an image
            print(f'Image = {p.name}, {s}Done. ({(1E3 * (t2 - t1)):.1f}ms)')
            total_time+=t2-t1
            count+=1
        
    # Print inference time of all the images in total
    print(f'Done. ({time.time() - t0:.3f}s)')
    
    print(f"Average time of inference per image = {1E3 * total_time/count} ms")
    
    return final_df[['img_name', 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label']]

In [3]:
detect_df = detect('data/test/images/', save_dir = 'results/test/output/', conf_th = 0.5, iou_th = 0.45)

detect_df.to_csv('results/test/test.csv', index=False)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Image = 1003.png, 5 unripes, Done. (598.3ms)
Image = 1008.png, 5 unripes, 1 partially ripe, 1 fully ripe, Done. (531.2ms)
Image = 101.png, 6 unripes, 1 fully ripe, Done. (533.3ms)
Image = 1012.png, 3 unripes, 1 partially ripe, 1 fully ripe, Done. (527.8ms)
Image = 1038.png, 1 unripe, 1 partially ripe, Done. (531.3ms)
Image = 1061.png, 2 unripes, 1 partially ripe, Done. (534.7ms)
Image = 1091.png, 5 unripes, 2 partially ripes, Done. (529.8ms)
Image = 1113.png, 3 unripes, 2 fully ripes, Done. (531.3ms)
Image = 1148.png, 1 unripe, 1 partially ripe, Done. (532.5ms)
Image = 1158.png, 7 unripes, 2 fully ripes, Done. (532.8ms)
Image = 1161.png, 2 unripes, 1 partially ripe, 1 fully ripe, Done. (535.3ms)
Image = 1169.png, 5 unripes, 1 partially ripe, Done. (534.0ms)
Image = 1199.png, 2 unripes, 2 partially ripes, Done. (533.6ms)
Image = 1206.png, 1 unripe, 3 partially ripes, Done. (533.2ms)
Image = 1213.png, 6 unripes, 1 partially ripe, Done. (532.1ms)
Image = 1224.png, 5 unripes, 1 partially r

Image = 2293.png, 4 unripes, 1 fully ripe, Done. (536.8ms)
Image = 2302.png, 6 unripes, 1 fully ripe, Done. (534.2ms)
Image = 2303.png, 8 unripes, 1 partially ripe, Done. (539.3ms)
Image = 2311.png, 6 unripes, Done. (535.0ms)
Image = 232.png, 5 unripes, Done. (537.6ms)
Image = 2324.png, 4 unripes, 2 fully ripes, Done. (536.3ms)
Image = 2329.png, 4 unripes, 1 fully ripe, Done. (532.0ms)
Image = 2336.png, 2 unripes, 1 partially ripe, Done. (528.0ms)
Image = 2349.png, 3 unripes, 1 partially ripe, 1 fully ripe, Done. (538.9ms)
Image = 2369.png, 5 unripes, Done. (532.8ms)
Image = 2385.png, 2 unripes, 1 partially ripe, Done. (530.4ms)
Image = 2392.png, 2 unripes, 1 partially ripe, Done. (531.7ms)
Image = 2397.png, 3 unripes, 1 partially ripe, 1 fully ripe, Done. (535.7ms)
Image = 2398.png, 4 unripes, 1 partially ripe, 2 fully ripes, Done. (534.9ms)
Image = 24.png, 7 unripes, Done. (528.7ms)
Image = 2411.png, 7 unripes, 3 partially ripes, Done. (538.4ms)
Image = 2445.png, 2 unripes, 1 partial

Image = 564.png, 2 unripes, 1 partially ripe, Done. (538.0ms)
Image = 570.png, 4 unripes, 1 partially ripe, Done. (538.3ms)
Image = 596.png, 5 unripes, 1 partially ripe, Done. (532.9ms)
Image = 597.png, 5 unripes, 1 partially ripe, 1 fully ripe, Done. (539.6ms)
Image = 615.png, 2 unripes, 2 partially ripes, 1 fully ripe, Done. (535.7ms)
Image = 629.png, 6 unripes, 2 partially ripes, Done. (535.4ms)
Image = 632.png, 5 unripes, 1 fully ripe, Done. (533.6ms)
Image = 635.png, 3 unripes, 2 partially ripes, 3 fully ripes, Done. (533.7ms)
Image = 662.png, 7 unripes, 1 partially ripe, Done. (537.3ms)
Image = 668.png, 5 unripes, 2 partially ripes, 1 fully ripe, Done. (537.2ms)
Image = 671.png, 3 unripes, 2 partially ripes, 1 fully ripe, Done. (543.0ms)
Image = 675.png, 8 unripes, 1 partially ripe, Done. (533.5ms)
Image = 686.png, 3 unripes, 2 partially ripes, Done. (574.3ms)
Image = 690.png, 4 unripes, Done. (568.8ms)
Image = 693.png, 2 unripes, 1 fully ripe, Done. (533.1ms)
Image = 697.png, 6 

In [4]:
detect_df

Unnamed: 0,img_name,xmin,ymin,xmax,ymax,confidence,label
0,1003.png,738.0,379.0,805.0,466.0,0.974735,unripe
1,1003.png,590.0,362.0,697.0,531.0,0.952862,unripe
2,1003.png,1.0,199.0,74.0,314.0,0.942174,unripe
3,1003.png,396.0,379.0,466.0,479.0,0.919797,unripe
4,1003.png,221.0,344.0,256.0,378.0,0.910763,unripe
...,...,...,...,...,...,...,...
1660,970.png,1.0,263.0,45.0,306.0,0.941539,unripe
1661,971.png,590.0,210.0,685.0,306.0,0.928950,unripe
1662,971.png,546.0,291.0,702.0,433.0,0.818956,partially_ripe
1663,971.png,94.0,712.0,196.0,754.0,0.785696,unripe


## Creating dataframe of original bounding boxes we get from Annotations

In [7]:
# Create an empty dataframe
df = pd.DataFrame(columns=['img_name', 'xmin', 'ymin', 'xmax', 'ymax', 'label'])

# Set the path to the directory containing the .txt files
txt_path = 'data/test/labels/'
img_path = 'data/test/images/'

label_dict = {0: 'unripe', 1: 'partially_ripe', 2: 'fully_ripe'}

img_names, x_mins, y_mins, x_maxs, y_maxs, labels = [], [], [], [], [], []

# Iterate through the files in the directory
for file in os.listdir(txt_path):
    # Open the file
    
    img = cv2.imread(os.path.join(img_path, file.replace('.txt', '.png')))
    
    with open(os.path.join(txt_path, file), 'r') as f:
        # Read the contents of the file
        contents = f.read()
        # Split the contents by newline to get a list of strings
        lines = contents.split('\n')
        # Iterate through the lines
        for line in lines:
            # Split the line by space to get a list of values
            values = line.split(' ')
        
            if len(values) > 1:
                x = int(float(values[1]) * img.shape[1])
                y = int(float(values[2]) * img.shape[0])
                w = int(float(values[3]) * img.shape[1])
                h = int(float(values[4]) * img.shape[0])

                xmin = np.clip(x - w//2, 0, None)
                ymin = np.clip(y - h//2, 0, None)
                xmax = np.clip(x + w//2, None, img.shape[1])
                ymax = np.clip(y + h//2, None, img.shape[0])
                
                img_names.append(file.replace('.txt', '.png'))
                x_mins.append(xmin)
                y_mins.append(ymin)
                x_maxs.append(xmax)
                y_maxs.append(ymax)
                labels.append(label_dict[int(values[0])])

# Display the dataframe
true_df = pd.DataFrame({'img_name': img_names, 'xmin':x_mins, 'ymin':y_mins, 'xmax':x_maxs, 'ymax':y_maxs, 'label':labels})
true_df.to_csv('results/test/test_ground_truth.csv', index=False)

In [8]:
true_df

Unnamed: 0,img_name,xmin,ymin,xmax,ymax,label
0,1003.png,914,0,936,10,fully_ripe
1,1003.png,880,0,914,10,unripe
2,1003.png,694,0,720,18,fully_ripe
3,1003.png,738,379,804,465,unripe
4,1003.png,588,364,698,532,unripe
...,...,...,...,...,...,...
1706,970.png,0,260,45,306,unripe
1707,971.png,545,290,703,434,fully_ripe
1708,971.png,590,206,684,306,unripe
1709,971.png,603,15,661,77,unripe


## Code to create .txt file for mAP calculation

In [146]:
true_df = pd.read_csv('results/test/test_ground_truth.csv')
detect_df = pd.read_csv('results/test/test.csv')

In [9]:
def remove_txt_files(folder):
    import fnmatch
    for root, dirs, files in os.walk(folder):
        for filename in fnmatch.filter(files, '*.txt'):
            os.unlink(os.path.join(root, filename))

# Test the function
remove_txt_files('mAP/input/')


for val in true_df['img_name'].unique():
    f_name = val.replace('.png', '.txt')
    
    gt_path = os.path.join('mAP', 'input', 'ground-truth', f_name)
    gt_temp = true_df[true_df['img_name']==val]
    np.savetxt(gt_path, gt_temp[['label', 'xmin', 'ymin', 'xmax', 'ymax']].values, fmt='%s')

    dt_path = os.path.join('mAP', 'input', 'detection-results', f_name)
    dt_temp = detect_df[detect_df['img_name']==val]
    np.savetxt(dt_path, dt_temp[['label', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']].values, fmt='%s')

In [10]:
!python mAP/main.py

86.64% = fully_ripe AP 
87.95% = partially_ripe AP 
93.04% = unripe AP 
mAP = 89.21%
Label wise results saved in the results.csv inside mAP folder


In [143]:
detections_map = pd.read_csv('mAP/result.csv')

In [144]:
detections_map

Unnamed: 0,Labels,Ground Truth Count,Detected Correctly,False Positive,Not Detected
0,fully_ripe,204,187,39,17
1,partially_ripe,308,284,51,24
2,unripe,1199,1139,53,60
