### Important libraries

In [1]:
import time
from pathlib import Path
import os

import cv2
import torch
from numpy import random
import pandas as pd
import numpy as np

from models.experimental import attempt_load
from utils.datasets import LoadImages
from utils.general import non_max_suppression, scale_coords, xyxy2xywh
from utils.torch_utils import select_device

  from .autonotebook import tqdm as notebook_tqdm


## Getting all the detections using our model and creating a dataframe

In [2]:
#Plot with just bounding box
def plot_one_box(x, image, color=None, label=None, line_thickness=3):
#     img = np.zeros_like(image, np.uint8)
    # Plots one bounding box on image img
    tl = line_thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(image, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    
    return image

In [3]:
def detect(folder_name, results_out=True, save_dir = "data/try/result/", model_name = 'best.pt', 
           save_img=False, imgsz = 640, conf_th = 0.5, iou_th = 0.45):
    final_out = []
    
    # Initialize
    device = select_device('cpu')

    # Load model
    model = attempt_load(model_name, map_location=device)

    # Loading images
    dataset = LoadImages(folder_name, img_size=imgsz)

    # Get names and colors
    names = model.names
    colors = [(105, 40, 123), (25, 179, 255), (76, 20, 224)]
    
    t0 = time.time()
    
    final_df = pd.DataFrame(columns=['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label', 'img_name'])
    label_dict = {0: 'unripe', 1: 'partially_ripe', 2: 'fully_ripe'}
    
    count = 0
    total_time = 0
    for p, img, im0s, _ in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.float()
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time.time()
        with torch.no_grad(): 
            pred = model(img)[0]  # Calculating gradients would cause a GPU memory leak

        # Apply NMS
        pred = non_max_suppression(pred, conf_th, iou_th)
        t2 = time.time()
        
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            s, im0, frame = '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path

            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                temp_df = pd.DataFrame(det.numpy(), columns=['xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label'])
                temp_df['label'] = temp_df['label'].apply(lambda x: label_dict[int(x)])
                temp_df['img_name'] = p.name

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    line = (cls, *xywh, conf)
                    final_out.append(line)

                    # Add bbox to image
                    label = f'{names[int(cls)]} {conf:.2f}'
                    im0 = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2)
                    
                if results_out:
                    cv2.imwrite(str(save_dir+'/'+ p.name), im0)
                    
                final_df = pd.concat([final_df, temp_df], ignore_index=True)
                
            # Print inference time of an image
            print(f'Image = {p.name}, {s}Done. ({(1E3 * (t2 - t1)):.1f}ms)')
            total_time+=t2-t1
            count+=1
    
    print(f"\nAverage time of inference per image = {1E3 * total_time/count} ms")
    
    return final_df[['img_name', 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'label']]

In [4]:
detect_df = detect('data/final_test/images/', save_dir = 'results/final_test/output/', conf_th = 0.75, iou_th = 0.7)

detect_df.to_csv('results/final_test/group8_test_result.csv', index=False)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Image = 2121.png, 1 unripe, 1 partially ripe, 1 fully ripe, Done. (607.0ms)
Image = 2169.png, 3 unripes, 2 partially ripes, Done. (640.0ms)
Image = 2230.png, 1 unripe, 2 partially ripes, Done. (536.0ms)
Image = 2236.png, 6 unripes, 1 partially ripe, 1 fully ripe, Done. (571.0ms)
Image = 2247.png, 4 unripes, 1 fully ripe, Done. (571.0ms)

Average time of inference per image = 584.998893737793 ms


In [5]:
detect_df

Unnamed: 0,img_name,xmin,ymin,xmax,ymax,confidence,label
0,2121.png,696.0,4.0,745.0,58.0,0.958096,unripe
1,2121.png,567.0,203.0,687.0,377.0,0.936165,partially_ripe
2,2121.png,427.0,163.0,576.0,389.0,0.900192,fully_ripe
3,2169.png,800.0,148.0,900.0,226.0,0.946462,unripe
4,2169.png,483.0,228.0,621.0,412.0,0.935113,partially_ripe
5,2169.png,240.0,282.0,310.0,362.0,0.921637,unripe
6,2169.png,130.0,28.0,179.0,64.0,0.917368,unripe
7,2169.png,30.0,15.0,164.0,126.0,0.867644,partially_ripe
8,2230.png,610.0,371.0,766.0,569.0,0.96638,partially_ripe
9,2230.png,293.0,455.0,398.0,605.0,0.951266,unripe


In [6]:
detect_df.groupby(['img_name', 'label']).size().reset_index().rename(columns={0:'count'})

Unnamed: 0,img_name,label,count
0,2121.png,fully_ripe,1
1,2121.png,partially_ripe,1
2,2121.png,unripe,1
3,2169.png,partially_ripe,2
4,2169.png,unripe,3
5,2230.png,partially_ripe,2
6,2230.png,unripe,1
7,2236.png,fully_ripe,1
8,2236.png,partially_ripe,1
9,2236.png,unripe,6


### Running inference on new images from web

In [19]:
detect_df = detect('data/additional/images/', save_dir = 'results/additional/output/', conf_th = 0.45, iou_th = 0.45)

detect_df.to_csv('results/additional/result.csv', index=False)

Fusing layers... 
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
RepConv.fuse_repvgg_block
Image = img1.png, 7 unripes, 4 fully ripes, Done. (479.2ms)
Image = img2.png, 5 unripes, 6 partially ripes, 7 fully ripes, Done. (506.3ms)

Average time of inference per image = 492.75267124176025 ms
