# EfficientDet Tutorial: inference, eval, and training 



<table align="left"><td>
  <a target="_blank"  href="https://github.com/google/automl/blob/master/efficientdet/tf2/tutorial.ipynb">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on github
  </a>
</td><td>
  <a target="_blank"  href="https://colab.sandbox.google.com/github/google/automl/blob/master/efficientdet/tf2/tutorial.ipynb">
    <img width=32px src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
</td></table>

# 0. Install and view graph.

## 0.1 Install package and download source code/image.



In [None]:
%%capture
#@title
import os
import sys
import tensorflow as tf

# Download source code.
if "efficientdet" not in os.getcwd():
  !git clone --depth 1 https://github.com/google/automl
  os.chdir('automl/efficientdet')
  sys.path.append('.')
  !pip install -r requirements.txt
  !pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
else:
  !git pull

In [None]:
MODEL = 'efficientdet-lite0'  #@param

def download(m):
  if m not in os.listdir():
    if m.find('lite'):
      !wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco/{m}.tgz
      !tar zxf {m}.tgz
    else:
      !wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco/{m}.tar.gz
      !tar zxf {m}.tar.gz
  ckpt_path = os.path.join(os.getcwd(), m)
  return ckpt_path

# Download checkpoint.
ckpt_path = download(MODEL)
print('Use model in {}'.format(ckpt_path))

# Prepare image and visualization settings.
image_url =  'https://user-images.githubusercontent.com/11736571/77320690-099af300-6d37-11ea-9d86-24f14dc2d540.png'#@param
image_name = 'img.png' #@param
!wget {image_url} -O img.png
import os
img_path = os.path.join(os.getcwd(), 'img.png')

min_score_thresh = 0.35  #@param
max_boxes_to_draw = 200  #@param
line_thickness =   2#@param

import PIL
# Get the largest of height/width and round to 128.
image_size = max(PIL.Image.open(img_path).size)

## 0.2 View graph in TensorBoard

In [None]:
!python -m model_inspect --model_name={MODEL} --logdir=logs &> /dev/null
%load_ext tensorboard
%tensorboard --logdir logs

# 1. inference

## 1.1 Benchmark network latency
There are two types of latency:
network latency and end-to-end latency.

*   network latency: from the first conv op to the network class and box prediction.
*   end-to-end latency: from image preprocessing, network, to the final postprocessing to generate a annotated new image.


In [None]:
# benchmark network latency
!python -m tf2.inspector --mode=benchmark --model_name={MODEL} --hparams="mixed_precision=true" --only_network

# With colab + Tesla T4 GPU, here are the batch size 1 latency summary:
# D0 (AP=33.5):  14.9ms,  FPS = 67.2   (batch size 8 FPS=)
# D1 (AP=39.6):  22.7ms,  FPS = 44.1   (batch size 8 FPS=)
# D2 (AP=43.0):  27.9ms,  FPS = 35.8   (batch size 8 FPS=)
# D3 (AP=45.8):  48.1ms,  FPS = 20.8   (batch size 8 FPS=)
# D4 (AP=49.4):  81.9ms,  FPS = 12.2   (batch size 8 FPS=)

## 1.2 Benchmark end-to-end latency

In [None]:
# Benchmark end-to-end latency (: preprocess + network + posprocess).
#
# With colab + Tesla T4 GPU, here are the batch size 1 latency summary:
# D0 (AP=33.5): 22.7ms,  FPS = 43.1   (batch size 4, FPS=)
# D1 (AP=39.6): 34.3ms,  FPS = 29.2   (batch size 4, FPS=)
# D2 (AP=43.0): 42.5ms,  FPS = 23.5   (batch size 4, FPS=)
# D3 (AP=45.8): 64.8ms,  FPS = 15.4   (batch size 4, FPS=)
# D4 (AP=49.4): 93.7ms,  FPS = 10.7   (batch size 4, FPS=)

batch_size = 1  # @param

saved_model_dir = 'savedmodel'
!rm -rf {saved_model_dir}

!python -m tf2.inspector --mode=export --model_name={MODEL} \
  --model_dir={ckpt_path} --saved_model_dir={saved_model_dir} \
  --batch_size={batch_size}  --hparams="mixed_precision=true"
!python -m tf2.inspector --mode=benchmark --model_name={MODEL} \
  --saved_model_dir={saved_model_dir} \
  --batch_size=1  --hparams="mixed_precision=true" --input_image=testdata/img1.jpg


## 1.3 Inference images.

---



In [None]:
# first export a saved model.
saved_model_dir = 'savedmodel'
!rm -rf {saved_model_dir}
!python -m tf2.inspector --mode=export --model_name={MODEL} \
  --model_dir={ckpt_path} --saved_model_dir={saved_model_dir}

# Then run saved_model_infer to do inference.
# Notably: batch_size, image_size must be the same as when it is exported.
serve_image_out = 'serve_image_out'
!mkdir {serve_image_out}

!python -m tf2.inspector --mode=infer \
  --saved_model_dir={saved_model_dir} \
  --model_name={MODEL}  --input_image=testdata/img1.jpg  \
  --output_image_dir={serve_image_out}

In [None]:
from IPython import display
display.display(display.Image(os.path.join(serve_image_out, '0.jpg')))

In [None]:
# In case you need to specify different image size or batch size or #boxes, then
# you need to export a new saved model and run the inferernce.

serve_image_out = 'serve_image_out'
!mkdir {serve_image_out}
saved_model_dir = 'savedmodel'
!rm -rf {saved_model_dir}

# Step 1: export model
!python -m tf2.inspector --mode=export \
  --model_name={MODEL} --model_dir={MODEL} \
  --hparams="image_size=1920x1280" --saved_model_dir={saved_model_dir}

# Step 2: do inference with saved model.
!python -m tf2.inspector --mode=infer \
  --model_name={MODEL} --saved_model_dir={saved_model_dir} \
  --input_image=img.png --output_image_dir={serve_image_out} \

from IPython import display
display.display(display.Image(os.path.join(serve_image_out, '0.jpg')))

## 1.4 Inference video

In [None]:
# step 0: download video
video_url = 'https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/data/video480p.mov'  # @param
!wget {video_url} -O input.mov

# Step 1: export model
saved_model_dir = 'savedmodel'
!rm -rf {saved_model_dir}

!python -m tf2.inspector --mode=export \
  --model_name={MODEL} --model_dir={MODEL} \
  --saved_model_dir={saved_model_dir} --hparams="mixed_precision=true"

# Step 2: do inference with saved model using saved_model_video
!python -m tf2.inspector --mode=video \
  --model_name={MODEL} \
  --saved_model_dir={saved_model_dir} --hparams="mixed_precision=true" \
  --input_video=input.mov --output_video=output.mov
# Then you can view the output.mov

# 2. TFlite

## 2.1 COCO evaluation on validation set.

In [None]:
if 'val2017' not in os.listdir():
  !wget http://images.cocodataset.org/zips/val2017.zip
  !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
  !unzip -q val2017.zip
  !unzip annotations_trainval2017.zip

  !mkdir tfrecord
  !python -m dataset.create_coco_tfrecord \
      --image_dir=val2017 \
      --object_annotations_file=annotations/instances_val2017.json \
      --output_file_prefix=tfrecord/val \
      --num_shards=32

## 2.2 TFlite export INT8 model

In [None]:
# In case you need to specify different image size or batch size or #boxes, then
# you need to export a new saved model and run the inferernce.

serve_image_out = 'serve_image_out'
!mkdir {serve_image_out}
saved_model_dir = 'savedmodel'
!rm -rf {saved_model_dir}

# # Step 1: export model
!python -m tf2.inspector --mode=export --file_pattern=tfrecord/*.tfrecord \
  --model_name={MODEL} --model_dir={MODEL} --num_calibration_steps=100 \
  --saved_model_dir={saved_model_dir} --use_xla --tflite=INT8

# Step 2: do inference with saved model.
!python -m tf2.inspector --mode=infer  --use_xla \
  --model_name={MODEL} --saved_model_dir={saved_model_dir}/int8.tflite \
  --input_image=testdata/img1.jpg --output_image_dir={serve_image_out}

from IPython import display
display.display(display.Image(os.path.join(serve_image_out, '0.jpg')))

In [None]:
# Evalute on validation set (takes about 10 mins for efficientdet-d0)
!python -m tf2.eval_tflite  \
    --model_name={MODEL}  --tflite_path={saved_model_dir}/int8.tflite   \
    --val_file_pattern=tfrecord/val* \
    --val_json_file=annotations/instances_val2017.json --eval_samples=100

## 2.3 Compile EdgeTPU model (Optional)

In [None]:
 # install edgetpu compiler
 !curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
 !echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list
 !sudo apt-get update
 !sudo apt-get install edgetpu-compiler

The EdgeTPU has 8MB of SRAM for caching model paramaters ([more info](https://coral.ai/docs/edgetpu/compiler/#parameter-data-caching)). This means that for models that are larger than 8MB, inference time will be increased in order to transfer over model paramaters. One way to avoid this is [Model Pipelining](https://coral.ai/docs/edgetpu/pipeline/) - splitting the model into segments that can have a dedicated EdgeTPU. This can significantly improve latency.

The below table can be used as a reference for the number of Edge TPUs to use - the larger models will not compile for a single TPU as the intermediate tensors can't fit in on-chip memory.

| Model architecture | Minimum TPUs | Recommended TPUs
|--------------------|-------|-------|
| EfficientDet-Lite0 | 1     | 1     |
| EfficientDet-Lite1 | 1     | 1     |
| EfficientDet-Lite2 | 1     | 2     |
| EfficientDet-Lite3 | 2     | 2     |
| EfficientDet-Lite4 | 2     | 3     |

In [None]:
NUMBER_OF_TPUS = 1
!edgetpu_compiler {saved_model_dir}/int8.tflite --num_segments=$NUMBER_OF_TPUS

# 3. COCO evaluation

## 3.1 COCO evaluation on validation set.

In [None]:
if 'val2017' not in os.listdir():
  !wget http://images.cocodataset.org/zips/val2017.zip
  !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
  !unzip -q val2017.zip
  !unzip annotations_trainval2017.zip

  !mkdir tfrecord
  !python -m dataset.create_coco_tfrecord \
      --image_dir=val2017 \
      --object_annotations_file=annotations/instances_val2017.json \
      --output_file_prefix=tfrecord/val \
      --num_shards=32

In [None]:
# Evalute on validation set (takes about 10 mins for efficientdet-d0)
!python -m tf2.eval  \
    --model_name={MODEL}  --model_dir={ckpt_path}  \
    --val_file_pattern=tfrecord/val*  \
    --val_json_file=annotations/instances_val2017.json

# 4. Training EfficientDets on PASCAL.

## 4.1 Prepare data

In [None]:
# Get pascal voc 2012 trainval data
import os
if 'VOCdevkit' not in os.listdir():
  !wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
  !tar xf VOCtrainval_06-Nov-2007.tar

  !mkdir tfrecord
  !python -m dataset.create_pascal_tfrecord  \
    --data_dir=VOCdevkit --year=VOC2007  --output_path=tfrecord/pascal

# Pascal has 5717 train images with 100 shards epoch, here we use a single shard
# for demo, but users should use all shards pascal-*-of-00100.tfrecord.
file_pattern = 'pascal-00000-of-00100.tfrecord'  # @param
images_per_epoch = 57 * len(tf.io.gfile.glob('tfrecord/' + file_pattern))
images_per_epoch = images_per_epoch // 8 * 8  # round to 64.
print('images_per_epoch = {}'.format(images_per_epoch))

## 4.2 Train Pascal VOC 2007 from ImageNet checkpoint for Backbone.

In [None]:
# Train efficientdet from scratch with backbone checkpoint.
backbone_name = {
    'efficientdet-d0': 'efficientnet-b0',
    'efficientdet-d1': 'efficientnet-b1',
    'efficientdet-d2': 'efficientnet-b2',
    'efficientdet-d3': 'efficientnet-b3',
    'efficientdet-d4': 'efficientnet-b4',
    'efficientdet-d5': 'efficientnet-b5',
    'efficientdet-d6': 'efficientnet-b6',
    'efficientdet-d7': 'efficientnet-b6',
    'efficientdet-lite0': 'efficientnet-lite0',
    'efficientdet-lite1': 'efficientnet-lite1',
    'efficientdet-lite2': 'efficientnet-lite2',
    'efficientdet-lite3': 'efficientnet-lite3',
    'efficientdet-lite3x': 'efficientnet-lite3',
    'efficientdet-lite4': 'efficientnet-lite4',
}[MODEL]


# generating train tfrecord is large, so we skip the execution here.
import os
if backbone_name not in os.listdir():
  if backbone_name.find('lite'):
    !wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/lite/{backbone_name}.tar.gz
  else:
    !wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/ckptsaug/{backbone_name}.tar.gz
  !tar xf {backbone_name}.tar.gz

!mkdir /tmp/model_dir
# key option: use --backbone_ckpt rather than --ckpt.
# Don't use ema since we only train a few steps.
!python -m tf2.train --mode=traineval \
    --train_file_pattern=tfrecord/{file_pattern} \
    --val_file_pattern=tfrecord/{file_pattern} \
    --model_name={MODEL} \
    --model_dir=/tmp/model_dir/{MODEL}-scratch  \
    --pretrained_ckpt={backbone_name} \
    --batch_size=16 \
    --eval_samples={images_per_epoch}  \
    --num_examples_per_epoch={images_per_epoch}  --num_epochs=1  \
    --hparams="num_classes=20,moving_average_decay=0,mixed_precision=true"

## 4.3 Train Pascal VOC 2007 from COCO checkpoint for the whole net.

In [None]:
# generating train tfrecord is large, so we skip the execution here.
import os
if MODEL not in os.listdir():
  download(MODEL)

!mkdir /tmp/model_dir/
# key option: use --ckpt rather than --backbone_ckpt.
!python -m tf2.train --mode=traineval \
    --train_file_pattern=tfrecord/{file_pattern} \
    --val_file_pattern=tfrecord/{file_pattern} \
    --model_name={MODEL} \
    --model_dir=/tmp/model_dir/{MODEL}-finetune \
    --pretrained_ckpt={MODEL} \
    --batch_size=16 \
    --eval_samples={images_per_epoch}  \
    --num_examples_per_epoch={images_per_epoch}  --num_epochs=1  \
    --hparams="num_classes=20,moving_average_decay=0,mixed_precision=true"

## 4.4 View tensorboard for loss and accuracy.


In [None]:
%load_ext tensorboard
%tensorboard --logdir /tmp/model_dir/
# Notably, this is just a demo with almost zero accuracy due to very limited
# training steps, but we can see finetuning has smaller loss than training
# from scratch at the begining.

## 5. Export to onnx


In [None]:
!pip install tf2onnx

In [None]:
!python -m tf2.inspector --mode=export --model_name={MODEL} --model_dir={MODEL} --saved_model_dir={saved_model_dir} --hparams="nms_configs.method='hard', nms_configs.iou_thresh=0.5, nms_configs.sigma=0.0"

In [None]:
!python -m tf2onnx.convert --saved-model={saved_model_dir} --output={saved_model_dir}/model.onnx  --opset=11