ayooshkathuria · realyanyang · May 23, 2019 · May 23, 2019
diff --git a/detect.py b/detect.py
@@ -108,6 +108,7 @@ def arg_parse():
     start = 0
 
     CUDA = torch.cuda.is_available()
+    # CUDA = False
 
     num_classes = 80
     classes = load_classes('data/coco.names') 
@@ -123,7 +124,7 @@ def arg_parse():
     assert inp_dim % 32 == 0 
     assert inp_dim > 32
 
-    #If there's a GPU availible, put the model on GPU
+    #If there's a GPU available, put the model on GPU
     if CUDA:
         model.cuda()
 
@@ -136,22 +137,24 @@ def arg_parse():
     try:
         imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
     except NotADirectoryError:
-        imlist = []
-        imlist.append(osp.join(osp.realpath('.'), images))
+        imlist = [osp.join(osp.realpath('.'), images)]
     except FileNotFoundError:
         print ("No file or directory with the name {}".format(images))
         exit()
-
+    if '.' in args.det:
+        print('Please input a det directory not a file')
+        exit(1)
     if not os.path.exists(args.det):
         os.makedirs(args.det)
-        
+
     load_batch = time.time()
 
+    # batches include [(img_, orig_im, dim)] img_ is reshaped
     batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
     im_batches = [x[0] for x in batches]
     orig_ims = [x[1] for x in batches]
     im_dim_list = [x[2] for x in batches]
-    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
+    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)   # dim.shape=(width x height)
 
 
 
@@ -174,7 +177,7 @@ def arg_parse():
 
 
     write = False
-    model(get_test_input(inp_dim, CUDA), CUDA)
+    # model(get_test_input(inp_dim, CUDA), CUDA)
 
     start_det_loop = time.time()
 
@@ -195,7 +198,7 @@ def arg_parse():
         # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) 
         # Put every proposed box as a row.
         with torch.no_grad():
-            prediction = model(Variable(batch), CUDA)
+            prediction = model(Variable(batch), CUDA)   # prediction.shape=(1, 10647, 85)
 
 #        prediction = prediction[:,scale_indices]
 
@@ -208,7 +211,7 @@ def arg_parse():
         #clubbing these ops in one loop instead of two. 
         #loops are slower than vectorised operations. 
 
-        prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
+        prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh)
 
 
         if type(prediction) == int:
@@ -282,26 +285,31 @@ def arg_parse():
     draw = time.time()
 
 
-    def write(x, batches, results):
-        c1 = tuple(x[1:3].int())
-        c2 = tuple(x[3:5].int())
-        img = results[int(x[0])]
-        cls = int(x[-1])
-        label = "{0}".format(classes[cls])
-        color = random.choice(colors)
-        cv2.rectangle(img, c1, c2,color, 1)
-        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
-        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
-        cv2.rectangle(img, c1, c2,color, -1)
-        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
+    def write(output_info, results):
+        img = results[int(output_info[0][0])]
+        if torch.sum(output_info[:, 1:]) == 0:
+            return img
+        for x in output_info:
+            c1 = tuple(x[1:3].int())
+            c2 = tuple(x[3:5].int())
+            cls = int(x[-1])
+            label = "{0}".format(classes[cls])
+            color = random.choice(colors)
+            cv2.rectangle(img, c1, c2, color, 1)
+            t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
+            c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
+            cv2.rectangle(img, c1, c2, color, -1)
+            cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
         return img
 
 
-    list(map(lambda x: write(x, im_batches, orig_ims), output))
-
-    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
-
-    list(map(cv2.imwrite, det_names, orig_ims))
+    num_index = torch.unique(output[:, 0])
+    output = [output[output[:, 0] == num] for num in num_index]
+    images_write_list = list(map(lambda x: write(x, orig_ims), output))
+
+    det_names = [osp.join(args.det, osp.split(img)[-1]) for img in imlist]
+    for output_file_name, img in zip(det_names, images_write_list):
+        cv2.imwrite(output_file_name, img)
 
     end = time.time()
 

diff --git a/util.py b/util.py
@@ -22,7 +22,7 @@ def convert2cpu(matrix):
     else:
         return matrix
 
-def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):
+def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA=True):
     batch_size = prediction.size(0)
     stride =  inp_dim // prediction.size(2)
     grid_size = inp_dim // stride
@@ -96,7 +96,7 @@ def unique(tensor):
     tensor_res.copy_(unique_tensor)
     return tensor_res
 
-def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
+def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4):
     conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
     prediction = prediction*conf_mask
 
@@ -112,13 +112,13 @@ def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.
     box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
     box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
     box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
-    prediction[:,:,:4] = box_a[:,:,:4]
+    prediction[:,:,:4] = box_a[:,:,:4]    # (xywh) --> (x1, y1, x2, y2)
 
 
 
     batch_size = prediction.size(0)
 
-    output = prediction.new(1, prediction.size(2) + 1)
+    output = torch.zeros(1, 8)     # if there is nothing to detect successfully
     write = False
 
 
@@ -131,7 +131,7 @@ def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.
         #Get the class having maximum score, and the index of that class
         #Get rid of num_classes softmax scores 
         #Add the class index and the class score of class having maximum score
-        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
+        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)   # values, indexes
         max_conf = max_conf.float().unsqueeze(1)
         max_conf_score = max_conf_score.float().unsqueeze(1)
         seq = (image_pred[:,:5], max_conf, max_conf_score)