In [1]:
import models as net # cloned from https://github.com/eriklindernoren/PyTorch-YOLOv3
import utils as utls

import os
import pandas as pd
import torch
import numpy as np
import cv2, json, mimetypes, pdb, PIL, requests
from torchvision import transforms, utils
from torch.autograd import Variable
from tqdm.notebook import trange, tqdm

# Data Preparation

# Models

## YOLOV3

### Parameters

In [2]:
cfg_file = 'config/yolov3.cfg' # loading configuration file
weight_file = 'weights/yolov3.weights' # lodeing pre-trained weights
class_path = 'data/coco.names'
img_size=416 # input image size
conf_thres=0.8
nms_thres=0.4
Tensor = torch.cuda.FloatTensor

### loading CNN

In [3]:
model = net.Darknet(cfg_file,img_size=img_size)
model.load_darknet_weights(weight_file)


model.cuda()
model.eval()

class_names = utls.utils.load_classes(class_path) # MSCOCO CLass names

### loading Frames list

In [4]:
csv_file = 'mergedlbls.csv'
frames_list = pd.read_csv(csv_file)

In [5]:
frames_list.head()

Unnamed: 0,im_name,class_id,Xc,Yc,w,h
0,RainyDay2/00081.jpg,truck,1011,594,56,58
1,RainyDay2/00082.jpg,truck,1006,599,54,59
2,RainyDay2/00083.jpg,truck,1007,600,60,71
3,RainyDay2/00084.jpg,truck,1006,603,58,62
4,RainyDay2/00085.jpg,truck,1007,598,52,66


### setting the class names and indexes based on MSCOCO's classes names

In [6]:
c2i= {'car':2,'truck':7,'bus':5,'motorcycle':3} # object classes
i2c = lambda i : [k for k,v in c2i.items() if v ==i] # convert values to classes
c2i['car'], i2c(2)

(2, ['car'])

### Applying model to the dataset

In [7]:
def detect_image(img):
    # scale image
    ratio = min(img_size/img.size[0], img_size/img.size[1])
    imw = round(img.size[0] * ratio)
    imh = round(img.size[1] * ratio)
    
    return imh,imw

In [8]:
class_correct = {'car':0,'truck':0,'bus':0,'motorcycle':0}
class_total = {'car':0,'truck':0,'bus':0,'motorcycle':1} # total predicted classes

root_dir = 'Frames/'
correct = 0
detected = 0
not_detected = 0

for image in trange(frames_list.shape[0]):
    img_path = os.path.join(root_dir,frames_list.iloc[image,0]) # finding the path of an image
    
    img = PIL.Image.open(img_path)  # reading the image
    cls_name = frames_list.iloc[image,1] # class name
    y = c2i[cls_name] # convert to intiger
    
    imh,imw = detect_image(img)
    
    img_transforms=transforms.Compose([transforms.Resize((imh,imw)), # resize, pad and transform to tensor
         transforms.Pad((max(int((imh-imw)/2),0), 
              max(int((imw-imh)/2),0), max(int((imh-imw)/2),0),
              max(int((imw-imh)/2),0)), (128,128,128)),
         transforms.ToTensor(),
         ])
    # transforming image to tensor 
    # https://towardsdatascience.com/object-detection-and-tracking-in-pytorch-b3cf1a696a98
    image_tensor = img_transforms(img).float()  
    image_tensor = image_tensor.unsqueeze_(0)
    input_img = Variable(image_tensor.type(Tensor))
    
    with torch.no_grad():
        detections = model(input_img)
               
        detections  = utls.utils.non_max_suppression(detections, conf_thres, nms_thres)
        if detections[0] is not None:
            ypred = detections[0][0][6].numpy().tolist()
            class_total[cls_name] +=ypred
            if ypred ==y:
                class_correct[i2c(y)[0]] +=1
                correct +=1
              
            
        else:
             not_detected+=1
            
         
    
        
print(f'Accuracy of the YOLOV3 on the {frames_list.shape[0]}test images: {100 * correct / frames_list.shape[0]:0.2f}%\n'
    
)

print(
    f'True Positives:{correct}\nFalse Positives:{frames_list.shape[0]-not_detected-correct}\n'
)

for k,v in class_correct.items():
    cls_acc = 100 * class_correct[k]/class_total[k]
    print(f'Accuracy of {k}: {cls_acc:0.2f}%')

HBox(children=(FloatProgress(value=0.0, max=3570.0), HTML(value='')))


Accuracy of the YOLOV3 on the 3570test images: 18.12%

True Positives:647
False Positives:183

Accuracy of car: 30.90%
Accuracy of truck: 8.21%
Accuracy of bus: 5.56%
Accuracy of motorcycle: 0.00%
