In [0]:
# 下载 TensorFlow Detection API 的 models
!git clone https://github.com/tensorflow/models.git

In [0]:
# 下载 protoc 3.6.1 版本
!wget https://github.com/google/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip

# 解压
!unzip protoc-3.6.1-linux-x86_64.zip

In [0]:
# 将 protoc 命令拷贝到系统默认命令执行目录
!cp bin/protoc /bin/protoc

# 查看 protoc 的版本号
!protoc --version

In [0]:
# 切换目录
import os
os.chdir("models/research/") 

In [0]:
# 将 TensorFlow Object Detection API 下的 object_detection 的所有 .proto 文件编译生成 .py 文件，以供 python 使用
!protoc object_detection/protos/*.proto --python_out=.

In [0]:
# 将 slim 目录 添加到 环境变量的 PYTHONPATH 下
%env PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim

env: PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim


In [0]:
# 查看 PYTHONPATH 的环境变量
!printenv | grep PYTHONPATH

PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim


In [0]:
# 导出相关的模块

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# 这是为了在Jupyter Notebook中显示图像
%matplotlib inline

print("tensorflow {}".format(tf.__version__))

tensorflow 1.10.0


In [0]:
# 将上层目录导入进来，为了执行下面的模块导入
sys.path.append("..")
from object_detection.utils import ops as utils_ops

# 导入 Object Detection 的 utils 模块 
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [0]:
# 模型的名称和下载地址拼接
MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# 模型下载解压后的目录里，冻结的graph，此 graph 保存了预训练网络的架构，这在对象检测时是经常这么用的
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# mscoco_label_map.pbtxt 保存了类别和索引的映射关系
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

In [0]:
# 模型下载
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)

# 下载后解压
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())

In [0]:
# 加载已冻结的预训练模型到内存中
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

In [0]:
# 加载类别与索引的映射关系
# 标签映射将索引映射到类别名称，因此当我们的卷积网络预测5时，我们知道这对应于飞机。 
# 这里我们使用内部 util 函数，但任何返回字典的映射索引到适当的字符串标签都是可以的
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [0]:
i = 0
for k, v in category_index.items():
    print("索引：{} 对应的类别：{}".format(k, v))
    i += 1
    if i == 10:
        break

索引：1 对应的类别：{'id': 1, 'name': u'person'}
索引：2 对应的类别：{'id': 2, 'name': u'bicycle'}
索引：3 对应的类别：{'id': 3, 'name': u'car'}
索引：4 对应的类别：{'id': 4, 'name': u'motorcycle'}
索引：5 对应的类别：{'id': 5, 'name': u'airplane'}
索引：6 对应的类别：{'id': 6, 'name': u'bus'}
索引：7 对应的类别：{'id': 7, 'name': u'train'}
索引：8 对应的类别：{'id': 8, 'name': u'truck'}
索引：9 对应的类别：{'id': 9, 'name': u'boat'}
索引：10 对应的类别：{'id': 10, 'name': u'traffic light'}


In [0]:
# 定义函数，将输入的图片转换成 Numpy 三维数组
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [0]:
# 定义单张图片的检测函数
def run_inference_for_single_image(image, graph):
    # 获取计算图
    with graph.as_default():
        # 开启一个 TensorFlow 会话
        with tf.Session() as sess:
            # 获取输入和输出的张量的句柄
            ops = tf.get_default_graph().get_operations()
            # 获取所有的张量的名称
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in [ 'num_detections', 'detection_boxes', 'detection_scores',
                         'detection_classes', 'detection_masks' ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                      tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
                        
            if 'detection_masks' in tensor_dict:
                # 下面的过程仅仅是针对单张图片的处理
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])

                # 需要使用 reframe 将蒙版从框坐标转换为图像坐标并适合图像大小
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0], image.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)

                # 通过添加批量维度来遵循惯例
                tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)
                
            image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

            # 运行对象推理检测，这里就是真正的检测
            output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)})

            # 所有输出都是float32 numpy数组，因此需要适当地转换类型
            # num_detections 表示检测框的个数
            output_dict['num_detections'] = int(output_dict['num_detections'][0])
            # detection_classes 表示每个框对应的检测类别 
            output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)
            # detection_boxes 表示检测到检测框
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            # detection_scores 表示检测到的检测结果评分
            output_dict['detection_scores'] = output_dict['detection_scores'][0]

            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict['detection_masks'][0]
            return output_dict

In [0]:
# 为了简单性，我们暂且使用两张图，分别是
# image1.jpg
# image2.jpg
# 如果你想测试你自己的图片，就把图片拷贝到 test_images 目录
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 5) ]

# 可视化显示结果时的输出大小，单位：英寸
IMAGE_SIZE = (22, 18)

In [0]:
for image_path in TEST_IMAGE_PATHS:
    image = Image.open(image_path)
    # 将读取到的图像转换成 numpy 多维数组
    image_np = load_image_into_numpy_array(image)
    # 由于模型需要的维度 shape 是 [1, None, None, 3]，所以我们需要扩展维度
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # 运行对象检测
    output_dict = run_inference_for_single_image(image_np, detection_graph)
    # 可视化检测的结果
    vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)