# [TPU Object Detection and Segmentation Framework](https://github.com/tensorflow/tpu/tree/master/models/official/detection)

In [1]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [2]:
import tensorflow
print(tensorflow.__version__)

1.15.2


In [3]:
#!apt-get install -y python-tk

In [4]:
!pip install --user Cython matplotlib opencv-python-headless pyyaml Pillow

Collecting opencv-python-headless
[?25l  Downloading https://files.pythonhosted.org/packages/b6/2a/496e06fd289c01dc21b11970be1261c87ce1cc22d5340c14b516160822a7/opencv_python_headless-4.4.0.42-cp36-cp36m-manylinux2014_x86_64.whl (36.6MB)
[K     |████████████████████████████████| 36.6MB 82kB/s 
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.4.0.42


In [5]:
!pip install --user 'git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI'



In [None]:
!git clone https://github.com/tensorflow/tpu/

Cloning into 'tpu'...


In [None]:
%cd tpu

In [None]:
%env

In [None]:
%env PYTHONPATH=/tensorflow-1.15.2/python3.6:/env/python:/content/tpu/models

In [None]:
%env PATH=/tensorflow-1.15.2/python3.6/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin:/opt/bin:/content/tpu/models/official/detection:/content/tpu/models/official/efficientnet

# [Fashionpedia: Ontology, Segmentation, and an Attribute Localization Dataset](https://github.com/tensorflow/tpu/tree/master/models/official/detection/projects/fashionpedia)

### [Fashionpedia Dataset](https://github.com/cvdfoundation/fashionpedia)

In [None]:
%cd /content/tpu/models/official/detection/projects/fashionpedia

In [None]:
%cd dataset

In [None]:
!wget https://s3.amazonaws.com/ifashionist-dataset/images/train2020.zip

In [None]:
!unzip train2020.zip

In [None]:
!wget https://s3.amazonaws.com/ifashionist-dataset/images/val_test2020.zip

In [None]:
!unzip val_test2020.zip

In [None]:
%cd ../

## Model Inference

### Use checkpoint

In [None]:
# MODEL="retinanet"
# IMAGE_SIZE=640
# CHECKPOINT_PATH="<path to the checkpoint>"
# PARAMS_OVERRIDE=""  # if any.
# LABEL_MAP_FILE="~/tpu/models/official/detection/datasets/coco_label_map.csv"
# IMAGE_FILE_PATTERN="<path to the JPEG image that you want to run inference on>"
# OUTPUT_HTML="./test.html"
# python ~/tpu/models/official/detection/inference.py \
#   --model="${MODEL?}" \
#   --image_size=${IMAGE_SIZE?} \
#   --checkpoint_path="${CHECKPOINT_PATH?}" \
#   --label_map_file="${LABEL_MAP_FILE?}" \
#   --image_file_pattern="${IMAGE_FILE_PATTERN?}" \
#   --output_html="${OUTPUT_HTML?}" \
#   --max_boxes_to_draw=10 \
#   --min_score_threshold=0.05


### [Checkpoint](https://github.com/tensorflow/tpu/tree/master/models/official/detection/projects/fashionpedia)

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/detection/projects/fashionpedia/fashionpedia-spinenet-49.tar.gz

In [None]:
!tar zxvf fashionpedia-spinenet-49.tar.gz

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/detection/projects/fashionpedia/fashionpedia-r50-fpn.tar.gz

In [None]:
!tar zxvf fashionpedia-r50-fpn.tar.gz

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/detection/projects/fashionpedia/fashionpedia-r101-fpn.tar.gz

In [None]:
!tar zxvf fashionpedia-r101-fpn.tar.gz

In [None]:
%%writefile inference.py
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# pylint: disable=line-too-long
r"""A stand-alone binary to run model inference and visualize results.

It currently only supports model of type `retinanet` and `mask_rcnn`. It only
supports running on CPU/GPU with batch size 1.
"""
# pylint: enable=line-too-long

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
sys.path.insert(0, "/content/tpu/models/official/detection")
sys.path.insert(1, "/content/tpu/models/official/efficientnet")

import base64
import csv
import io

from absl import flags
from absl import logging

import numpy as np
from PIL import Image
from pycocotools import mask as mask_api
import tensorflow.compat.v1 as tf

from dataloader import mode_keys
from projects.fashionpedia.configs import factory as config_factory
from projects.fashionpedia.modeling import factory as model_factory
from utils import box_utils
from utils import input_utils
from utils import mask_utils
from utils.object_detection import visualization_utils
from hyperparameters import params_dict


FLAGS = flags.FLAGS

flags.DEFINE_string(
    'model', 'attribute_mask_rcnn', 'Support `attribute_mask_rcnn`.')
flags.DEFINE_integer('image_size', 640, 'The image size.')
flags.DEFINE_string(
    'checkpoint_path', '', 'The path to the checkpoint file.')
flags.DEFINE_string(
    'config_file', '', 'The config file template.')
flags.DEFINE_string(
    'params_override', '', 'The YAML file/string that specifies the parameters '
    'override in addition to the `config_file`.')
flags.DEFINE_string(
    'label_map_file', '',
    'The label map file. See --label_map_format for the definition.')
flags.DEFINE_string(
    'label_map_format', 'csv',
    'The format of the label map file. Currently only support `csv` where the '
    'format of each row is: `id:name`.')
flags.DEFINE_string(
    'image_file_pattern', '',
    'The glob that specifies the image file pattern.')
flags.DEFINE_string(
    'output_html', '/tmp/test.html',
    'The output HTML file that includes images with rendered detections.')
flags.DEFINE_string(
    'output_file', '/tmp/res.npy',
    'The output npy file that includes model output.')
flags.DEFINE_integer(
    'max_boxes_to_draw', 10, 'The maximum number of boxes to draw.')
flags.DEFINE_float(
    'min_score_threshold', 0.05,
    'The minimum score thresholds in order to draw boxes.')


def main(unused_argv):
  del unused_argv
  # Load the label map.
  print(' - Loading the label map...')
  label_map_dict = {}
  if FLAGS.label_map_format == 'csv':
    with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file:
      reader = csv.reader(csv_file, delimiter=':')
      for row in reader:
        if len(row) != 2:
          raise ValueError('Each row of the csv label map file must be in '
                           '`id:name` format.')
        id_index = int(row[0])
        name = row[1]
        label_map_dict[id_index] = {
            'id': id_index,
            'name': name,
        }
  else:
    raise ValueError(
        'Unsupported label map format: {}.'.format(FLAGS.label_mape_format))

  params = config_factory.config_generator(FLAGS.model)
  if FLAGS.config_file:
    params = params_dict.override_params_dict(
        params, FLAGS.config_file, is_strict=True)
  params = params_dict.override_params_dict(
      params, FLAGS.params_override, is_strict=True)
  params.override({
      'architecture': {
          'use_bfloat16': False,  # The inference runs on CPU/GPU.
      },
  }, is_strict=True)
  params.validate()
  params.lock()

  model = model_factory.model_generator(params)

  with tf.Graph().as_default():
    image_input = tf.placeholder(shape=(), dtype=tf.string)
    image = tf.io.decode_image(image_input, channels=3)
    image.set_shape([None, None, 3])

    image = input_utils.normalize_image(image)
    image_size = [FLAGS.image_size, FLAGS.image_size]
    image, image_info = input_utils.resize_and_crop_image(
        image,
        image_size,
        image_size,
        aug_scale_min=1.0,
        aug_scale_max=1.0)
    image.set_shape([image_size[0], image_size[1], 3])

    # batching.
    images = tf.reshape(image, [1, image_size[0], image_size[1], 3])
    images_info = tf.expand_dims(image_info, axis=0)

    # model inference
    outputs = model.build_outputs(
        images, {'image_info': images_info}, mode=mode_keys.PREDICT)

    outputs['detection_boxes'] = (
        outputs['detection_boxes'] / tf.tile(images_info[:, 2:3, :], [1, 1, 2]))

    predictions = outputs

    # Create a saver in order to load the pre-trained checkpoint.
    saver = tf.train.Saver()

    image_with_detections_list = []
    with tf.Session() as sess:
      print(' - Loading the checkpoint...')
      saver.restore(sess, FLAGS.checkpoint_path)

      res = []
      image_files = tf.gfile.Glob(FLAGS.image_file_pattern)
      for i, image_file in enumerate(image_files):
        print(' - Processing image %d...' % i)

        with tf.gfile.GFile(image_file, 'rb') as f:
          image_bytes = f.read()

        image = Image.open(image_file)
        image = image.convert('RGB')  # needed for images with 4 channels.
        width, height = image.size
        np_image = (np.array(image.getdata())
                    .reshape(height, width, 3).astype(np.uint8))

        predictions_np = sess.run(
            predictions, feed_dict={image_input: image_bytes})

        num_detections = int(predictions_np['num_detections'][0])
        np_boxes = predictions_np['detection_boxes'][0, :num_detections]
        np_scores = predictions_np['detection_scores'][0, :num_detections]
        np_classes = predictions_np['detection_classes'][0, :num_detections]
        np_classes = np_classes.astype(np.int32)
        np_attributes = predictions_np['detection_attributes'][
            0, :num_detections, :]
        np_masks = None
        if 'detection_masks' in predictions_np:
          instance_masks = predictions_np['detection_masks'][0, :num_detections]
          np_masks = mask_utils.paste_instance_masks(
              instance_masks, box_utils.yxyx_to_xywh(np_boxes), height, width)
          encoded_masks = [
              mask_api.encode(np.asfortranarray(np_mask))
              for np_mask in list(np_masks)]

        res.append({
            'image_file': image_file,
            'boxes': np_boxes,
            'classes': np_classes,
            'scores': np_scores,
            'attributes': np_attributes,
            'masks': encoded_masks,
        })

        image_with_detections = (
            visualization_utils.visualize_boxes_and_labels_on_image_array(
                np_image,
                np_boxes,
                np_classes,
                np_scores,
                label_map_dict,
                instance_masks=np_masks,
                use_normalized_coordinates=False,
                max_boxes_to_draw=FLAGS.max_boxes_to_draw,
                min_score_thresh=FLAGS.min_score_threshold))
        image_with_detections_list.append(image_with_detections)

  print(' - Saving the outputs...')
  formatted_image_with_detections_list = [
      Image.fromarray(image.astype(np.uint8))
      for image in image_with_detections_list]
  html_str = '<html>'
  image_strs = []
  for formatted_image in formatted_image_with_detections_list:
    with io.BytesIO() as stream:
      formatted_image.save(stream, format='JPEG')
      data_uri = base64.b64encode(stream.getvalue()).decode('utf-8')
    image_strs.append(
        '<img src="data:image/jpeg;base64,{}", height=800>'
        .format(data_uri))
  images_str = ' '.join(image_strs)
  html_str += images_str
  html_str += '</html>'
  with tf.gfile.GFile(FLAGS.output_html, 'w') as f:
    f.write(html_str)
  np.save(FLAGS.output_file, res)


if __name__ == '__main__':
  flags.mark_flag_as_required('model')
  flags.mark_flag_as_required('checkpoint_path')
  flags.mark_flag_as_required('label_map_file')
  flags.mark_flag_as_required('image_file_pattern')
  flags.mark_flag_as_required('output_html')
  logging.set_verbosity(logging.INFO)
  tf.app.run(main)


In [None]:
!git checkout d35e485

In [None]:
%%bash
MODEL="attribute_mask_rcnn"
IMAGE_SIZE=640
CHECKPOINT_PATH="./fashionpedia-r50-fpn/model.ckpt"
#CHECKPOINT_PATH="./fashionpedia-r101-fpn/model.ckpt"
#CHECKPOINT_PATH="./fashionpedia-spinenet-49/model.ckpt"
PARAMS_OVERRIDE=""  # if any.
LABEL_MAP_FILE="./dataset/fashionpedia_label_map.csv"
#IMAGE_FILE_PATTERN="./dataset/test/ab23d0c1ccdadcac2da79af78298ea8d.jpg"
IMAGE_FILE_PATTERN="./dataset/train/4789aab7dffdf2ebe4ea809275df5e26.jpg"
OUTPUT_HTML="./test.html"
python ./inference.py \
  --model="${MODEL?}" \
  --image_size=${IMAGE_SIZE?} \
  --checkpoint_path="${CHECKPOINT_PATH?}" \
  --label_map_file="${LABEL_MAP_FILE?}" \
  --image_file_pattern="${IMAGE_FILE_PATTERN?}" \
  --output_html="${OUTPUT_HTML?}" \
  --max_boxes_to_draw=10 \
  --min_score_threshold=0.05

In [None]:
from IPython.display import Image,display_jpeg
display_jpeg(Image('./dataset/train/4789aab7dffdf2ebe4ea809275df5e26.jpg'))

In [None]:
import IPython
IPython.display.HTML(filename='./test.html')

### Use SavedModel

In [None]:
# SAVED_MODEL_DIR="<path to the SavedModel>"
# LABEL_MAP_FILE="~/tpu/models/official/detection/datasets/coco_label_map.csv"
# IMAGE_FILE_PATTERN="<path to the JPEG image that you want to run inference on>"
# OUTPUT_HTML="./test.html"
# python ~/tpu/models/detection/inference_saved_model \
#   --saved_model_dir="${SAVED_MODEL_DIR?}" \
#   --label_map_file="${LABEL_MAP_FILE?}" \
#   --image_file_pattern="${IMAGE_FILE_PATTERN?}" \
#   --output_html="${OUTPUT_HTML?}" \
#   --max_boxes_to_draw=10 \
#   --min_score_threshold=0.05
