# Intro to Object Detection Colab

Welcome to the object detection colab! This demo will take you through the steps of running an "out-of-the-box" detection model in SavedModel format on a collection of images.



Imports

In [1]:
import io
import os
import scipy.misc
import numpy as np
import six
import time
import pathlib
import pickle

from six import BytesIO

import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils

%matplotlib inline

In [2]:
def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: a file path (this can be local or on colossus)

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Load the COCO Label Map
category_index = {
    1: {'id': 1, 'name': 'person'},
    2: {'id': 2, 'name': 'bicycle'},
    3: {'id': 3, 'name': 'car'},
    4: {'id': 4, 'name': 'motorcycle'},
    5: {'id': 5, 'name': 'airplane'},
    6: {'id': 6, 'name': 'bus'},
    7: {'id': 7, 'name': 'train'},
    8: {'id': 8, 'name': 'truck'},
    9: {'id': 9, 'name': 'boat'},
    10: {'id': 10, 'name': 'traffic light'},
    11: {'id': 11, 'name': 'fire hydrant'},
    13: {'id': 13, 'name': 'stop sign'},
    14: {'id': 14, 'name': 'parking meter'},
    15: {'id': 15, 'name': 'bench'},
    16: {'id': 16, 'name': 'bird'},
    17: {'id': 17, 'name': 'cat'},
    18: {'id': 18, 'name': 'dog'},
    19: {'id': 19, 'name': 'horse'},
    20: {'id': 20, 'name': 'sheep'},
    21: {'id': 21, 'name': 'cow'},
    22: {'id': 22, 'name': 'elephant'},
    23: {'id': 23, 'name': 'bear'},
    24: {'id': 24, 'name': 'zebra'},
    25: {'id': 25, 'name': 'giraffe'},
    27: {'id': 27, 'name': 'backpack'},
    28: {'id': 28, 'name': 'umbrella'},
    31: {'id': 31, 'name': 'handbag'},
    32: {'id': 32, 'name': 'tie'},
    33: {'id': 33, 'name': 'suitcase'},
    34: {'id': 34, 'name': 'frisbee'},
    35: {'id': 35, 'name': 'skis'},
    36: {'id': 36, 'name': 'snowboard'},
    37: {'id': 37, 'name': 'sports ball'},
    38: {'id': 38, 'name': 'kite'},
    39: {'id': 39, 'name': 'baseball bat'},
    40: {'id': 40, 'name': 'baseball glove'},
    41: {'id': 41, 'name': 'skateboard'},
    42: {'id': 42, 'name': 'surfboard'},
    43: {'id': 43, 'name': 'tennis racket'},
    44: {'id': 44, 'name': 'bottle'},
    46: {'id': 46, 'name': 'wine glass'},
    47: {'id': 47, 'name': 'cup'},
    48: {'id': 48, 'name': 'fork'},
    49: {'id': 49, 'name': 'knife'},
    50: {'id': 50, 'name': 'spoon'},
    51: {'id': 51, 'name': 'bowl'},
    52: {'id': 52, 'name': 'banana'},
    53: {'id': 53, 'name': 'apple'},
    54: {'id': 54, 'name': 'sandwich'},
    55: {'id': 55, 'name': 'orange'},
    56: {'id': 56, 'name': 'broccoli'},
    57: {'id': 57, 'name': 'carrot'},
    58: {'id': 58, 'name': 'hot dog'},
    59: {'id': 59, 'name': 'pizza'},
    60: {'id': 60, 'name': 'donut'},
    61: {'id': 61, 'name': 'cake'},
    62: {'id': 62, 'name': 'chair'},
    63: {'id': 63, 'name': 'couch'},
    64: {'id': 64, 'name': 'potted plant'},
    65: {'id': 65, 'name': 'bed'},
    67: {'id': 67, 'name': 'dining table'},
    70: {'id': 70, 'name': 'toilet'},
    72: {'id': 72, 'name': 'tv'},
    73: {'id': 73, 'name': 'laptop'},
    74: {'id': 74, 'name': 'mouse'},
    75: {'id': 75, 'name': 'remote'},
    76: {'id': 76, 'name': 'keyboard'},
    77: {'id': 77, 'name': 'cell phone'},
    78: {'id': 78, 'name': 'microwave'},
    79: {'id': 79, 'name': 'oven'},
    80: {'id': 80, 'name': 'toaster'},
    81: {'id': 81, 'name': 'sink'},
    82: {'id': 82, 'name': 'refrigerator'},
    84: {'id': 84, 'name': 'book'},
    85: {'id': 85, 'name': 'clock'},
    86: {'id': 86, 'name': 'vase'},
    87: {'id': 87, 'name': 'scissors'},
    88: {'id': 88, 'name': 'teddy bear'},
    89: {'id': 89, 'name': 'hair drier'},
    90: {'id': 90, 'name': 'toothbrush'},
}

In [3]:
def compute_iou(box1, box2, X, Y):
    '''
    ymin, xmin, ymax, xmax = box
    把纵坐标当成X，横坐标当成Y，懒得改了...
    '''
    Xmin1, Ymin1, Xmax1, Ymax1 = box1
    Xmin2, Ymin2, Xmax2, Ymax2 = box2
    Xmin1 = X*Xmin1; Xmax1 = X*Xmax1; Xmin2 = X*Xmin2; Xmax2 = X*Xmax2
    Ymin1 = Y*Ymin1; Ymax1 = Y*Ymax1; Ymin2 = Y*Ymin2; Ymax2 = Y*Ymax2
    # 获得相交区域的左上角坐标和右下角坐标 min = max(min) max = min(max)
    inter_Xmin = max(Xmin1, Xmin2)
    inter_Ymin = max(Ymin1, Ymin2)
    inter_Xmax = min(Xmax1, Xmax2)
    inter_Ymax = min(Ymax1, Ymax2)

    # 以免不相交
    W = max(0, inter_Xmax - inter_Xmin)
    H = max(0, inter_Ymax - inter_Ymin)

    # 计算相交区域面积
    inter_area = W * H

    # 计算并集面积
    merge_area = (Xmax1 - Xmin1) * (Ymax1 - Ymin1) + (Xmax2 - Xmin2) * (Ymax2 - Ymin2)

    # 计算IOU
    IOU = inter_area / (merge_area - inter_area + 1e-6)

    return Xmin1, Ymin1, Xmax1, Ymax1,IOU

In [4]:
def result_over_score_thersh(detections,score_thersh=0.5):
    (boxes,classes,scores) = (detections['detection_boxes'][0].numpy(),
        detections['detection_classes'][0].numpy().astype(np.int32),
        detections['detection_scores'][0].numpy())
    truth_num=sum(scores>score_thersh)
    (truth_boxes,truth_classes,truth_scores) = (boxes[0:truth_num],classes[0:truth_num],scores[0:truth_num])
    truth_detections = {'boxes':truth_boxes, 'classes':truth_classes, 'scores':truth_scores}
    return truth_detections
# detections_result = result_over_score_thersh(detections)

In [5]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [6]:
# @title Choose the model to use, then evaluate the cell.
MODELS = {'ssd_mo_320': 'ssd_mobilenet_v2_320x320_coco17_tpu-8', 
          'rcnn_res_1024': 'faster_rcnn_resnet50_v1_1024x1024_coco17_tpu-8',
          'ground_truth': 'faster_rcnn_inception_resnet_v2_1024x1024_coco17_tpu-8'}

model_display_name = 'rcnn_res_1024' # @param ['centernet_with_keypoints', 'centernet_without_keypoints']
model_name = MODELS[model_display_name]

In [7]:
model_name

'faster_rcnn_resnet50_v1_1024x1024_coco17_tpu-8'

In [8]:
path = "/home/hezhaoliang/github/"
model_path = "/home/hezhaoliang/.keras/models/"+model_name+"/saved_model/"

In [9]:
start_time = time.time()
tf.keras.backend.clear_session()
detect_fn = tf.saved_model.load(model_path)
end_time = time.time()
elapsed_time = end_time - start_time
print('Elapsed time: ' + str(elapsed_time) + 's')

Elapsed time: 12.667657613754272s


In [15]:
resolution = "1024x1024" # "640x640" "320x320" 
# image_dir = path+'models/research/object_detection/test_images'
image_dir = "/home/hezhaoliang/PerConfigure/dataset/youtube/demo1_10s_15_"+resolution
result_path = "/home/hezhaoliang/PerConfigure/results/"+model_display_name #"detections_results"
elapsed = []
detections_results = []
image_number = 153
for i in range(image_number):   
#   image_path = os.path.join(image_dir, 'image' + str(i + 1) + '.jpg')
  image_path = os.path.join(image_dir, 'image-' + str(i + 1) + '.jpg')
  image_np = load_image_into_numpy_array(image_path)
  input_tensor = np.expand_dims(image_np, 0)
  start_time = time.time()
  detections = detect_fn(input_tensor)
  end_time = time.time()
  elapsed.append(end_time - start_time)
  detections_result =  result_over_score_thersh(detections)
  detections_results.append(detections_result)
#   plt.rcParams['figure.figsize'] = [42, 21]
#   label_id_offset = 1
#   image_np_with_detections = image_np.copy()
#   viz_utils.visualize_boxes_and_labels_on_image_array(
#         image_np_with_detections,
#         detections['detection_boxes'][0].numpy(),
#         detections['detection_classes'][0].numpy().astype(np.int32),
#         detections['detection_scores'][0].numpy(),
#         category_index,
#         use_normalized_coordinates=True,
#         max_boxes_to_draw=200,
#         min_score_thresh=.30,
#         agnostic_mode=False)
#   plt.subplot(image_number, 1, i+1)
#   plt.imshow(image_np_with_detections)
with open(result_path+'.defaultdict', 'wb') as f:
    pickle.dump(detections_results, f)
with open(result_path+'.defaultdict', 'rb') as f:
    ground_truths = pickle.load(f)
mean_elapsed = sum(elapsed[1:]) / float(len(elapsed)-1)
print('Elapsed time: ' + str(mean_elapsed) + ' second per image')

Elapsed time: 0.12782645225524902 second per image


In [18]:
len(ground_truths)

3

In [16]:
ground_truths

[{'boxes': array([[0.33864772, 0.3281252 , 0.41157007, 0.39268097],
         [0.05120193, 0.61047405, 0.10545065, 0.6483168 ],
         [0.39458755, 0.76962364, 0.5020651 , 0.83852494],
         [0.4706258 , 0.2053618 , 0.5842914 , 0.2928001 ],
         [0.39287895, 0.76422435, 0.50092006, 0.8353104 ],
         [0.4703747 , 0.20697436, 0.5837997 , 0.2935365 ]], dtype=float32),
  'classes': array([3, 3, 8, 8, 3, 3], dtype=int32),
  'scores': array([0.97308314, 0.69923055, 0.64112085, 0.62551105, 0.53636813,
         0.5237527 ], dtype=float32)},
 {'boxes': array([[3.3651888e-01, 3.3459693e-01, 4.1179997e-01, 3.9732474e-01],
         [3.9854392e-01, 7.6783913e-01, 5.0485849e-01, 8.3768934e-01],
         [6.0779071e-01, 3.3180043e-04, 7.8417325e-01, 2.8219119e-02],
         [4.6821800e-01, 2.1095462e-01, 5.8050388e-01, 2.9819962e-01],
         [5.0162226e-02, 6.1104304e-01, 1.0217616e-01, 6.5027541e-01],
         [4.6854100e-01, 2.1065286e-01, 5.8210415e-01, 2.9533920e-01]],
        dtype

In [17]:
elapsed

[0.13752174377441406, 0.1271519660949707, 0.12850093841552734]