In [12]:
# YOLOV3 Packages from  https://github.com/eriklindernoren/PyTorch-YOLOv3
import models as net
import utils as utls


import os
import pandas as pd
import torch
import numpy as np
import cv2, json, mimetypes, pdb, PIL, requests
from torchvision import transforms, utils
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.autograd import Variable
from tqdm.notebook import trange, tqdm

Tensor = torch.cuda.FloatTensor

# Data Preparation

In [13]:
csv_file = 'mergedlbls.csv'
frames_list = pd.read_csv(csv_file)

In [14]:
frames_list.head()

Unnamed: 0,im_name,class_id,Xc,Yc,w,h
0,RainyDay2/00081.jpg,truck,1011,594,56,58
1,RainyDay2/00082.jpg,truck,1006,599,54,59
2,RainyDay2/00083.jpg,truck,1007,600,60,71
3,RainyDay2/00084.jpg,truck,1006,603,58,62
4,RainyDay2/00085.jpg,truck,1007,598,52,66


In [15]:
cls_cnt = frames_list.iloc[:,1].value_counts()
class_total =cls_cnt.to_dict()
class_total

{'car': 6904, 'truck': 356, 'bus': 18, 'motorcycle': 14}

In [16]:
def detect_image(img):
    # scale image
    ratio = min(inp_w/img.size[0], inp_h/img.size[1])
    imw = round(img.size[0] * ratio)
    imh = round(img.size[1] * ratio)
    
    
    return imh,imw

In [17]:
# to avoid 0/0
def division(n, d):
    return n / d if d else 0

# Models

## YOLOV3

to load YOLOV3 model run this:

In [None]:
# !git clone https://github.com/eriklindernoren/PyTorch-YOLOv3.git
# cd PyTorch-YOLOv3/       

copy the Frames/ folder in the root of cd PyTorch-YOLOv3/

to have the pretrained weights go to the weight folder and run download_weights.sh

In [34]:
# cd weights/
# !bash download_weights.sh # this command works in linux

### Parameters

In [18]:
cfg_file = 'config/yolov3.cfg' # loading configuration file
weight_file = 'weights/yolov3.weights' # lodeing pre-trained weights
class_path = 'data/coco.names'
# input image size
inp_w = 416
inp_h = 416 
conf_thres=0.8
nms_thres=0.4


### loading CNN

In [19]:
model = net.Darknet(cfg_file,img_size=inp_w)
model.load_darknet_weights(weight_file)
torch.cuda.empty_cache()

model.cuda()
model.eval()

class_names = utls.utils.load_classes(class_path) # MSCOCO CLass names

### setting the class names and indexes

In [35]:
#COCO names
coco_f = 'data/coco.names'
with open(coco_f) as f:
    COCO_NAMES = f.read().splitlines()
# indexing the existing labels based on coco names
c2i = {k:v for v,k in enumerate(COCO_NAMES)}
targets= {'car':c2i['car'],'truck':c2i['truck'],'bus':c2i['bus'],'motorcycle':c2i['motorbike']} # object classes
i2c = lambda i : [k for k,v in targets.items() if v ==i]
targets

{'car': 2, 'truck': 7, 'bus': 5, 'motorcycle': 3}

### Applying model to the dataset

In [22]:
class_correct = {'car':0,'truck':0,'bus':0,'motorcycle':0}
#class_total = {'car':0,'truck':0,'bus':0,'motorcycle':0} # total predicted classes

root_dir = 'Frames/'
correct = 0
detected = 0
not_detected = 0

for image in trange(frames_list.shape[0]):
    img_path = os.path.join(root_dir,frames_list.iloc[image,0]) # finding the path of an image
    
    img = PIL.Image.open(img_path)  # reading the image
    cls_name = frames_list.iloc[image,1] # class name
    y = targets[cls_name] # convert to intiger
    
    imh,imw = detect_image(img)
    
    img_transforms=transforms.Compose([transforms.Resize((imh,imw)), # resize, pad and transform to tensor
         transforms.Pad((max(int((imh-imw)/2),0), 
              max(int((imw-imh)/2),0), max(int((imh-imw)/2),0),
              max(int((imw-imh)/2),0)), (128,128,128)),
         transforms.ToTensor(),
         ])
    # transforming image to tensor 
    # https://towardsdatascience.com/object-detection-and-tracking-in-pytorch-b3cf1a696a98
    image_tensor = img_transforms(img).float()
    
    image_tensor = image_tensor.unsqueeze_(0)
    input_img = Variable(image_tensor.type(Tensor))
    
    with torch.no_grad():
        detections = model(input_img)
               
        detections  = utls.utils.non_max_suppression(detections, conf_thres, nms_thres)
        if detections[0] is not None:
            ypred =[]
            for i in range (len(detections[0])):
                ypred.append(detections[0][i][6].numpy().tolist())
                

            if y in ypred:
                class_correct[cls_name] +=1
                correct +=1
              
            
        else:
             not_detected+=1
            
         
    
        
print(f'Accuracy of the YOLOV3 on the {frames_list.shape[0]} test images: {100 * correct / frames_list.shape[0]:0.2f}%\n'
    
)



for k,v in class_correct.items():
    cls_acc = 100 * division(class_correct[k],class_total[k])
    print(f'Accuracy of {k}: {cls_acc:0.2f}%')

HBox(children=(FloatProgress(value=0.0, max=7292.0), HTML(value='')))


Accuracy of the YOLOV3 on the 7292 test images: 20.80%

Accuracy of car: 20.39%
Accuracy of truck: 30.34%
Accuracy of bus: 5.56%
Accuracy of motorcycle: 0.00%


In [23]:
class_correct

{'car': 1408, 'truck': 108, 'bus': 1, 'motorcycle': 0}

In [24]:
class_total

{'car': 6904, 'truck': 356, 'bus': 18, 'motorcycle': 14}

In [25]:
correct

1517

## FAST_RCNN

### Parameters

In [26]:
# coco names for the pretrained Fast_RCNN
COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [27]:
# input image size
inp_w = 300
inp_h = 400

In [28]:
# convering classes
i2c = lambda i : [k for k,v in targets.items() if v ==i]
c2i = {k:v for v,k in enumerate(COCO_INSTANCE_CATEGORY_NAMES)}

targets= {'car':c2i['car'],'truck':c2i['truck'],'bus':c2i['bus'],'motorcycle':c2i['motorcycle']} # object classes
targets['car']

3

### Loading FAST_RCNN

In [29]:
# loading the model
torch.cuda.empty_cache()

model = fasterrcnn_resnet50_fpn(pretrained=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model.eval()
model.cuda()
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
  

### Applying model to the dataset

In [30]:
class_correct = {'car':0,'truck':0,'bus':0,'motorcycle':0}
#class_total = {'car':0,'truck':0,'bus':0,'motorcycle':0} # total predicted classes
trsh = .8
root_dir = 'Frames/'
correct = 0
detected = 0
not_detected = 0

for image in trange(frames_list.shape[0]):
    img_path = os.path.join(root_dir,frames_list.iloc[image,0]) # finding the path of an image
    
    img = PIL.Image.open(img_path)  # reading the image
    cls_name = frames_list.iloc[image,1] # class name
    y = targets[cls_name] # convert to intiger
    
    imh,imw = detect_image(img)
    
    img_transforms=transforms.Compose([transforms.Resize((imh,imw)), # resize, pad and transform to tensor
         transforms.Pad((max(int((imh-imw)/2),0), 
              max(int((imw-imh)/2),0), max(int((imh-imw)/2),0),
              max(int((imw-imh)/2),0)), (128,128,128)),
         transforms.ToTensor(),
         ])
    # transforming image to tensor 
    # https://towardsdatascience.com/object-detection-and-tracking-in-pytorch-b3cf1a696a98
    image_tensor = img_transforms(img).float()
    
    image_tensor = image_tensor.unsqueeze_(0)
    input_img = Variable(image_tensor.type(Tensor))
    
    with torch.no_grad():
        detections = model(input_img)
        
        if detections is not None:
                labels = detections[0]['labels'].cpu().tolist()
                scores = detections[0]['scores'].cpu().tolist()
                scores_ =[x for x in scores if x>trsh]
                
                if len(scores_)!=0:
                    scores_idx = scores.index(scores_[0])
                    if scores_idx is not None:
                        ypred = labels[scores_idx:]
                else:
                    ypred =[]
                    
                if len(ypred)!=0:    

#                     for y in ypred:
#                             class_total[cls_name] +=y # counting predicted total objects per class 
                    if y in ypred:
                        class_correct[cls_name] +=1
                        correct +=1


                else:
                     not_detected+=1
            
         
    
        
print(f'Accuracy of the Fast_RCNN on the {frames_list.shape[0]} test images: {100 * correct / frames_list.shape[0]:0.2f}%\n'
    
)



for k,v in class_correct.items():
    cls_acc = 100 * division(class_correct[k],class_total[k])
    print(f'Accuracy of {k}: {cls_acc:0.2f}%')

HBox(children=(FloatProgress(value=0.0, max=7292.0), HTML(value='')))


Accuracy of the Fast_RCNN on the 7292 test images: 23.35%

Accuracy of car: 22.77%
Accuracy of truck: 36.52%
Accuracy of bus: 5.56%
Accuracy of motorcycle: 0.00%


In [31]:
class_correct

{'car': 1572, 'truck': 130, 'bus': 1, 'motorcycle': 0}

In [32]:
class_total

{'car': 6904, 'truck': 356, 'bus': 18, 'motorcycle': 14}

In [33]:
correct

1703

## SSD

In [19]:
torch.cuda.empty_cache()