In [1]:
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F 
from torch.autograd import Variable
import numpy as np
import cv2

In [2]:
def predict_transform( prediction, inp_dim, anchors, num_classes, CUDA=True):
    
    batch_size = prediction.size(0)
    stride = inp_dim //prediction.size(2)
    grid_size  = inp_dim //stride
    bbox_attrs = 5 +num_classes
    num_anchors = len(anchors)

    prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size)
    prediction = prediction.transpose(1,2).contiguous()
    prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors,bbox_attrs)
    prediction = prediction.cuda()
    
    # To maintain consistency with input image dimensions
    anchors = [(a[0]/stride, a[1]/stride) for a in anchors]
    
    # Sigmoid the centre_X, centre_Y, and object confidence
    prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
    prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
    prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])
    
    # Add the center offsets
    grid = np.arange(grid_size)
    a,b = np.meshgrid(grid, grid)
    x_offset = torch.FloatTensor(a).view(-1,1)
    y_offset = torch.FloatTensor(b).view(-1,1)
    
    if CUDA:
        x_offset = x_offset.cuda()
        y_offset = y_offset.cuda()
        
    x_y_offset = torch.cat((x_offset, y_offset),1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)
    prediction[:,:,:2] += x_y_offset
    
    # Apply the anchors to the dimensions of the bounding box.
    #log space transform height and the width
    anchors = torch.FloatTensor(anchors)
    
    if CUDA:
        anchors = anchors.cuda()
        
    anchors = anchors.repeat(grid_size*grid_size,1).unsqueeze(0)
    prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors
    
    # Apply sigmoid activation to the class scores
    prediction[:,:,5: 5+num_classes] = torch.sigmoid((prediction[:,:,5:5+num_classes]))
    
    # Resize the detections map to the size of the input image.
    prediction[:,:,:4] *=stride
    return prediction

In [3]:
# Parse the configuration file
def parse_cfg(cfgfile):
    file = open(cfgfile, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if len(x) > 0]
    # To get rid of the comments
    lines = [x for x in lines if x[0] != '#']
    
    lines = [x.rstrip().lstrip() for x in lines]
    
    block = {}
    blocks = []
    
    for line in lines:
        # This marks the start of a new block
        if line[0] == "[":      
            # block is storing the previous block attributes so need to be added to blocks and reinitialized
            if len(block) !=0:
                blocks.append(block)
                block = {}
            # Indicate the type of block    
            block["type"]=line[1:-1].rstrip() 
        else:
            # Add various attributes of the blocks as key, value pairs
            key,value = line.split("=")
            block[key.rstrip()] = value.lstrip()
    blocks.append(block)
    
    return blocks
    

In [4]:
class EmptyLayer(nn.Module):
    def __init__(self):
        super(EmptyLayer,self).__init__()

In [5]:
class DetectionLayer(nn.Module):
    def __init__(self,anchors):
        super(DetectionLayer, self).__init__()
        self.anchors = anchors
            

In [6]:
def create_modules(blocks):
    # Stores information about the neural network such as params etc.
    net_info = blocks[0]
    module_list = nn.ModuleList()
    # To decide the depth of kernel we use the number of filters
    # in previous layer.
    prev_filters = 3
    # Keep track of all preceding layers in the block.
    output_filters = []
    
    for index,x in enumerate(blocks[1:]):
        module = nn.Sequential()
        
        # check the type of block
        # create a new module for the block
        # append to module_list
        
        if (x["type"] == "convolutional"):
            # Get the info about the layer
            activation = x["activation"]
            
            # Some layers might not have batch normalization
            try:
                batch_normalize = int(x["batch_normalize"])
                bias = False
            except:
                batch_normalize = 0
                bias = True
            
            #Other parameters described in the config file
            filters = int(x["filters"])
            padding = int(x["pad"])
            kernel_size = int(x["size"])
            stride = int(x["stride"])
            
            if padding:
                pad = (kernel_size -1) // 2
            else:
                pad = 0
                
            # Add the convolutional layer
            conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias = bias)
            module.add_module("conv_{0}".format(index),conv)
            
            # Add the Batch Norm layer
            if batch_normalize:
                bn = nn.BatchNorm2d(filters)
                module.add_module("batch_norm_{0}".format(index),bn)
                
            # Check the activation
            # It is either Linear or a leaky ReLu for YOLO
            if activation == "leaky":
                activn = nn.LeakyReLU(0.1, inplace = True)
                module.add_module("leaky_{0}".format(index),activn)
        
        # If it's an upsampling layer
        # We use Bilinear2dUpsampling
        elif (x["type"] == "upsample"):
            stride = int(x["stride"])
            upsample = nn.Upsample(scale_factor = 2, mode = "bilinear")
            module.add_module("upsample_{}".format(index), upsample)
            
        # If it's a route layer
        elif (x["type"] == "route"):
            x["layers"] = x["layers"].split(',')
            # Start of a route
            # Note that layers can have 2 numbers (-1,61) as discussed before
            start = int(x["layers"][0])
            # end, if there exists one
            try:
                end = int(x["layers"][1])
            except:
                end = 0
            
            # Positive annotation
            if start > 0:
                start = start - index
            if end > 0:
                end = end - index
            
            route = EmptyLayer()
            module.add_module("route_{0}".format(index), route)
            # Update the filters variable to hold the number of filters
            # outputted by a route layer.
            if end < 0:
                filters = output_filters[index + start] + output_filters[index + end]
            else:
                filters = output_filters[index + start]
        
        # shortcut corresponds to skip connections like in ResNet
        elif x["type"] == "shortcut":
            shortcut = EmptyLayer()
            module.add_module("shortcut_{}".format(index), shortcut)
            
         # Yolo is the detection layer
        elif x["type"] == "yolo":
            mask = x["mask"].split(",")
            mask = [int(a) for a in mask]
            
            anchors = x["anchors"].split(",")
            anchors = [int(a) for a in anchors]
            anchors = [(anchors[i],anchors[i+1]) for i in range(0,len(anchors),2)]
            anchors = [anchors[i] for i in mask]
            
            
            detection = DetectionLayer(anchors)
            module.add_module("Detection_{}".format(index), detection)
            
            
        # Some bookkeeping: add the module, update the filters list
        # add all the previous layers
        module_list.append(module)
        prev_filters = filters
        output_filters.append(filters)
    return(net_info, module_list)
        

In [7]:
blocks = parse_cfg("cfg/yolov3.cfg")

In [8]:
create_modules(blocks)

({'angle': '0',
  'batch': '1',
  'burn_in': '1000',
  'channels': '3',
  'decay': '0.0005',
  'exposure': '1.5',
  'height': '416',
  'hue': '.1',
  'learning_rate': '0.001',
  'max_batches': '500200',
  'momentum': '0.9',
  'policy': 'steps',
  'saturation': '1.5',
  'scales': '.1,.1',
  'steps': '400000,450000',
  'subdivisions': '1',
  'type': 'net',
  'width': '416'},
 ModuleList(
   (0): Sequential(
     (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
     (leaky_0): LeakyReLU(0.1, inplace)
   )
   (1): Sequential(
     (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
     (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
     (leaky_1): LeakyReLU(0.1, inplace)
   )
   (2): Sequential(
     (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (batch_norm_2): BatchNorm2d(32, eps=1e-05, m

In [9]:
# Define a network for our detector
class Darknet(nn.Module):
    def __init__(self,cfgfile):
        super(Darknet, self).__init__()
        self.blocks = parse_cfg(cfgfile)
        self.net_info, self.module_list = create_modules(self.blocks)
    
    def forward(self, x, CUDA):
        modules = self.blocks[1:]
        # Cache the outputs for the route layer
        outputs = {}
    
        write = 0
        # Modules are appended in the same order as they are present
        # in the cfg file, so we can just run our input through 
        # each module to get the output. 
        for i, module in enumerate(modules):
            module_type = (module["type"])
            if module_type == "convolutional" or module_type == "upsample":
                x= x.cuda()
                x = self.module_list[i](x)
                
            elif module_type == "route":
                layers = module["layers"]
                layers = [int(a) for a in layers]
            
                if (layers[0])>0:
                    layers[0] = layers[0] - i
                
                if len(layers) == 1:
                    x = outputs[i+(layers[0])]
                
                else:
                    if (layers[1]) > 0:
                        layers[1] = layers[1] -i
                    
                    map1 = outputs[i+layers[0]]
                    map2 = outputs[i+layers[1]]
                    
                    x = torch.cat((map1,map2),1)
                    
            elif module_type == "shortcut":
                from_ = int(module["from"])
                x = outputs[i-1] + outputs[i+from_]
                
            elif module_type == 'yolo':
                
                # Get the anchor dimensions
                anchors = self.module_list[i][0].anchors
                # Get the input dimensions
                inp_dim = int(self.net_info["height"])
                
                # Get the number of classes
                num_classes = int(module["classes"])
                
                # Transform
                x = x.data
                x = predict_transform(x, inp_dim, anchors, num_classes,CUDA)
                if not write:
                    detections = x
                    write = 1
                
                else:
                    detections = torch.cat((detections,x),1)
            outputs[i] = x
        
        return detections
                    
         # Function to load weights of the pretrained Yolo network
    def load_weights(self,weightfile):
        #Open the weights file
        fp = open(weightfile, "rb")

        # The first 5 values are header information
        # 1. Major version number
        # 2. Minor version number
        # 3. Subversion number
        # 4,5. Images seen by the network (during training)
        header = np.fromfile(fp,dtype = np.int32, count =5)
        self.header = torch.from_numpy(header)
        self.seen = self.header[3]

        # Load the weights
        weights = np.fromfile(fp,dtype=np.float32)

        # Iterate over the weights file and load the weights into the modules
        # of our network

        # ptr keeps track of where we are in the weights array
        ptr = 0
        for i in range(len(self.module_list)):
            # Iterate over the layer type
            module_type = self.blocks[i+1]["type"]

            # IF module_type is convolutional load weights
            # Otherwise ignore
            if module_type == "convolutional":
                model = self.module_list[i]
                try:
                    batch_normalize = int(self.blocks[i+1]["batch_normalize"])
                except:
                    batch_normalize = 0

                conv = model[0]

                if (batch_normalize):
                    bn = model[1]

                    # Get the number of weights of Batch Norm Layer
                    num_bn_biases = bn.bias.numel()

                    # Load the weights
                    bn_biases = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
                    # Increment ptr in the weights file by number of weights used
                    ptr += num_bn_biases

                    bn_weights = torch.from_numpy(weights[ptr:ptr + num_bn_biases])
                    ptr += num_bn_biases

                    bn_running_mean = torch.from_numpy(weights[ptr:ptr +num_bn_biases])
                    ptr += num_bn_biases

                    bn_running_var = torch.from_numpy(weights[ptr:ptr +num_bn_biases])
                    ptr += num_bn_biases

                    # Cast the loaded weights into dims of model weights
                    bn_biases = bn_biases.view_as(bn.bias.data)
                    bn_weights = bn_weights.view_as(bn.weight.data)
                    bn_running_mean = bn_running_mean.view_as(bn.running_mean)
                    bn_running_var = bn_running_var.view_as(bn.running_var)

                    # Copy the data to model
                    bn.bias.data.copy_(bn_biases)
                    bn.weight.data.copy_(bn_weights)
                    bn.running_mean.copy_(bn_running_mean)
                    bn.running_var.copy_(bn_running_var)

                else:
                    #Number of biases
                    num_biases = conv.bias.numel()

                    # Load the weights
                    conv_biases = torch.from_numpy(weights[ptr:ptr+num_biases])
                    ptr = ptr +num_biases

                    # reshape the loaded weights according to the dims of the
                    # model weights
                    conv_biases = conv_biases.view_as(conv.bias.data)

                    # Finally copy the data
                    conv.bias.data.copy_(conv_biases)


                # Load the weights for the Convolutional layers
                num_weights = conv.weight.numel()

                # Do the same as above for weights
                conv_weights = torch.from_numpy(weights[ptr:ptr+num_weights])
                ptr = ptr + num_weights

                conv_weights = conv_weights.view_as(conv.weight.data)
                conv.weight.data.copy_(conv_weights)               


In [10]:
# Now we can load the network alongwith the pretrained weights
model = Darknet("cfg/yolov3.cfg")
model.load_weights("yolov3.weights")

In [11]:
# Unique is used to get an individual instance of an object
# if multiple instances of the object are detected in the 
# image.
def unique(tensor):
    tensor_np = tensor.cpu().numpy()
    unique_np = np.unique(tensor_np)
    unique_tensor =torch.from_numpy(unique_np)
        
    tensor_res = tensor.new(unique_tensor.shape)
    tensor_res.copy_(unique_tensor)
    return tensor_res


In [12]:
# Subject the detections obtained to Non-maximal suppression and
# objectness score thresholding to get correct detector answer.

def write_results(prediction, confidence, num_classes, nms_conf = 0.4):
# nms_conf is NMS IoU threshold
    # If objectness score is below a threshold set all attribute values of
    # the bounding box to zero.
    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
    prediction = prediction*conf_mask
    
    # Transform box corners to calculate IoU
    box_corner = prediction.new(prediction.shape)
    box_corner[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_corner[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_corner[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
    box_corner[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_corner[:,:,:4]
    
    
    # NMS and objectness score thresholding should be done for 1 image at a time
    batch_size = prediction.size(0)
    
    write =False
    
    for ind in range(batch_size):
        # Image tensor
        image_pred = prediction[ind]
            #confidence thresholding
            #NMS
        # Remove the 80 classes since we just want the maximum one
        # Add index of that class, and also class score of that class
        # Use the max to get required class
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+num_classes],1)
        
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq,1)
        
        # Remove those bounding box rows having an object confidence
        # less than threshold
        non_zero_ind = (torch.nonzero(image_pred[:,4]))
        try:
            image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        except:
            continue
        
        # For Pytorch 0.4 compatibility
        # Since the above code will not raise exception for no detection
        # as scalars are supported in Pytorch 0.4
        # This will handle those cases where there are no detections
        if image_pred_.shape[0] == 0:
            continue
        # Get the various classes detected in the image
        # -1 index holds the class index
        
        # Unique will ensure that only one instance of the object is selected
        img_classes = unique(image_pred_[:,-1])

        for cls in img_classes:
            # perform NMS
            # Extract detections of a class
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            image_pred_class = image_pred_[class_mask_ind].view(-1,7)
            
            #sort the detection such that the entry with the maximum objects
            #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True) [1]
            image_pred_class = image_pred_class[conf_sort_index]
            # Number of detections
            idx = image_pred_class.size(0)
            
            # Now we can perform NMS
            for i in range(idx):
                # Get the IoUs of all boxes that come after the one 
                # we are looking at in the loop
                try:
                    # bbox_iou takes input as the bounding box row that 
                    # is indexed by the variable i in the loop
                    # Second input to bbox_iou is a tensor of multiple rows
                    # of bounding boxes. Output is a tensor containing IoUs
                    # of the bounding box represented by the first input with each of the
                    # bounding boxes present in the second input.
                    ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    
                except ValueError:
                    break
                except IndexError:
                    break
                
                # Zero out all the detections that have IoU> threshold
                
                
                # Remove the non-zero entries
                #non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                #image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
                
                # Gives the IoU of box, indexed by i with all the bounding boxes 
                # having indices higher than i.
                ious = bbox_iou(image_pred_class[i].unsqueeze(0),image_pred_class[i+1:])
                
                # Zero out all the detections that have IoU > threshold
                iou_mask = (ious < nms_conf).float().unsqueeze(1)
                image_pred_class[i+1:] *= iou_mask
                
                # Remove the non-zero entries
                non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                image_pred_class = image_pred_class[non_zero_ind]
                
                batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
                # Repeat the batch_id for as many detections of the class cls in the image
                seq = batch_ind, image_pred_class
                
                if not write:
                    output = torch.cat(seq,1)
                    write = True
                else:
                    out = torch.cat(seq,1)
                    output = torch.cat((output, out))
    try:
        return output
    except:
        return 0
                

In [13]:

def bbox_iou(box1,box2):
    """
    Returns the IoU of two bounding boxes
    
    """
    # Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
    
    # get the coordinates of the intersection rectangle
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.min(b1_x2, b2_x2)
    inter_rect_y2 = torch.min(b1_y2, b2_y2)
    
    # Intersection area
    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0 ) * torch.clamp(inter_rect_y2 - inter_rect_y1 +1, min = 0)
    
    # Union Area
    b1_area = (b1_x2 - b1_x1 +1 )*(b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 +1 )*(b2_y2 - b2_y1 + 1)
    
    iou = inter_area / (b1_area + b2_area - inter_area)
    
    return iou


In [14]:
# Now to test our code
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
import argparse
import os
import os.path as osp
import pickle as pkl
import pandas as pd
import random

# Need to add argparse commands here if required

In [15]:
def load_classes(namesfile):
    fp = open(namesfile, "r")
    names = fp.read().split("\n")[:-1]
    return names

In [16]:
# Object detection is done according to classes For COCO dataset
num_classes = 80 
classes = load_classes("data/coco.names")

In [17]:
print classes

['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [18]:
# Load the network
model = Darknet("cfg/yolov3.cfg")
model.load_weights("yolov3.weights")

model.cuda()
model.eval()


Darknet(
  (module_list): ModuleList(
    (0): Sequential(
      (conv_0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_0): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
      (leaky_0): LeakyReLU(0.1, inplace)
    )
    (1): Sequential(
      (conv_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (batch_norm_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (leaky_1): LeakyReLU(0.1, inplace)
    )
    (2): Sequential(
      (conv_2): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (batch_norm_2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
      (leaky_2): LeakyReLU(0.1, inplace)
    )
    (3): Sequential(
      (conv_3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch_norm_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (leaky_3): LeakyReLU(0.1, inplace)
    )
    (4): Sequential(
      

In [19]:
import os, sys

# Open a file
path = "../val/c0"
dirs = os.listdir( path )
imlist2 = []
# This would print all the files and directories
for file in dirs:
    imlist2.append(file)
imlist = []
for files in imlist2:
    imlist.append(osp.join(osp.realpath('../val/c0'), files))

In [20]:
# Make the outputs directory
if not os.path.exists('../test_outputs/c0'):
    os.makedirs('../test_outputs/c0')

In [21]:
# Load the images
#loaded_ims = [cv2.imread(x) for x in imlist]
loaded_ims2 = [cv2.imread(x) for x in imlist]

In [22]:
#print (loaded_ims[0].shape)
print (loaded_ims2[0].shape)

(480, 640, 3)


# May need to write code for resizing image to maintain aspect ratio

In [23]:
# Takes OpenCV images and converts it to the input of our network
def prep_image(img, inp_dim):
    """
    Prepare image for inputting to the neural network.
    
    Returns a Variable
    """
    img = cv2.resize(img, (inp_dim, inp_dim))
    img = img[:,:,::-1].transpose((2,0,1)).copy()
    img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
    return img

In [24]:
# Maintain a list containing the dimensions of the original images
inp_dim = 416
# PyTorch Variables for images
im_batches = list(map(prep_image, loaded_ims2, [inp_dim for x in range(len(imlist))]))

# List containing dimensions of original images
#im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims2]
#im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)

#im_dim_list = im_dim_list.cuda()


In [25]:
# Make batches for sending  for testing
batch_size = 1
leftover = 0 
#if (len(im_dim_list) % batch_size):
#    leftover = 1
    
if batch_size != 1:
    num_batches = len(imlist) // batch_size +leftover
    im_batches = [torch.cat((im_batches[i*batch_size : min((i+1)* batch_size, len(im_batches))])) for i in range(num_batches)]
    


In [26]:
# Do the detection
confidence = 0.5
nms_thresh = 0.4
write = 0
for i, batch in enumerate(im_batches):
    # load the image
    batch = batch.cuda()
    model = model.cuda()
    prediction = model(Variable(batch, volatile = True),CUDA = 1)
    prediction = write_results(prediction, confidence , num_classes, nms_conf = nms_thresh)
    if type(prediction) ==  int:
        for im_num, image in enumerate(imlist[i*batch_size: min((i +1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
        continue
    
    # transform the attribute from index in batch to index in imlist
    prediction[:,0] += i*batch_size
    
    if not write:
        output = prediction
        write =1
    else:
        output = torch.cat((output, prediction))
        
    for im_num, image in enumerate(imlist[i*batch_size:min((i + 1)*batch_size, len(imlist))]):
        im_id = i*batch_size + im_num
        objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
        
    
    torch.cuda.synchronize()
    

In [27]:
# We check whether there has been has a single detection or not
try:
    output
except NameError:
    print ("NO detections were made")

In [28]:
# We have skipped the part of clamping the output to be within the image
# We have skipped the part to account for resolution


In [29]:
def crop(x,results):
    
    c1 = tuple(x[1:3].int())
    c2 = tuple(x[3:5].int())
    (width,height)=(abs(c1[0]-c2[0]),abs(c1[1]-c2[1]))
    #prinpt(width,height)
    img = results[int(x[0])]
    #print (type(img))
    #if int(x[7]) == 0:
    crop_img = img[c1[0]:c1[0]+width+100, c1[1]:c1[1]+height+100,:]
    #crop_img_new = cv2.resize(crop_img, (416,416,3), interpolation = cv2.INTER_CUBIC)
    
    #else:
        #crop_img = img
    
    if crop_img.shape[0]==0 or crop_img.shape[1]==0:
        resized = img
    elif len(crop_img)==0:
        resized = img
    else:
        #print crop_img.shape
        resized = cv2.resize(crop_img, (224,224), interpolation = cv2.INTER_AREA)
    return resized

In [30]:

#import pdb
y = [] #put images
dict_image_person={}
count=0
for img in output:
    if(int(img[0]) not in dict_image_person):
        dict_image_person[int(img[0])]=count
    else:
        if(int(img[7])==0):
            dict_image_person[int(img[0])]=count
    count+=1
        
count=0
for img in output:
    if((dict_image_person[int(img[0])])==count):
        y+=[crop(img,loaded_ims2)]
    count+=1
    

In [32]:
print len(y),len(loaded_ims2)
print len(output)

269 269
872


In [33]:
args_det = '../test_outputs/c0'

In [34]:
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args_det,x.split("/")[-1]))

In [35]:
print (len(det_names))

269


In [36]:
list(map(cv2.imwrite,det_names, y))

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,