## Setup (installing object detection api)

In [0]:
# Download and install tensorflow object detection api

%matplotlib inline
%cd /content
!git clone https://github.com/tensorflow/models.git
%cd /content/models/research
!wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip
!unzip protobuf.zip
!./bin/protoc object_detection/protos/*.proto --python_out=.


# Run test to verify installation
!python object_detection/builders/model_builder_test.py


In [0]:
import os
os.environ['PYTHONPATH'] += "/content/models:/content/models/research:/content/models/research/slim"
!echo $PYTHONPATH 

## Download training/testing data

In [0]:
# Given a filename and fileid, downloads the file from google drive to the given path.
# Also takes of navigating through virus scan popups on gdrive for large files

def download(filename, fileid, path):
  %cd {path}
  str_to_send_1 = "\"https://drive.google.com/uc?export=download&id=" + fileid + "\""
  !curl -c ./cookie -s -L {str_to_send_1}
  
  str_to_send_2 = "\"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=" + fileid + "\""
  !curl -Lb ./cookie {str_to_send_2} -o {filename}


In [0]:
# Downloads a model from object detection zoo. 

def downloadModel(filename, path):
  %cd {path}
  !curl -L -o {filename} http://download.tensorflow.org/models/object_detection/{filename}
  !tar -zxvf {filename}
  !rm -rf {filename}

In [0]:
# remove any previous content
%cd /content
!rm -rf /content/training
!mkdir training
!mkdir training/data
!mkdir training/models
!mkdir training/models/model

data_path = "/content/training/data"
model_path = "/content/training/models/model"
models_path = "/content/training/models"

download("train.record","1Semkf1nofrDujdvRbT5u1vXbpqnVuDv-", data_path)
download("test.record","1EN03jqqPbi8d8gPGRmtjhXQwjPMJKiIg", data_path)
download("pipeline.config","1InN2NhU-q7hozbJnRNAUx8DIkvbjPvCm", model_path)
download("labelMap.pbtxt","1KBupijpNNMPf9ItPtEcZen3lfx-EqyY7", data_path)
downloadModel("ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz", models_path)

## Start Tensorboard instance using ngrok

In [0]:
# setting up tensorboard
%cd /content
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip
LOG_DIR = '/content/training/models/model'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 6006 &')
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

## Pipeline config

In [0]:
%%writefile /content/training/models/model/pipeline.config
model {
  ssd {
    num_classes: 62
    image_resizer {
      fixed_shape_resizer {
        height: 300
        width: 300
      }
    }
    feature_extractor {
      type: "ssd_mobilenet_v2"
      depth_multiplier: 1.0
      min_depth: 16
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 3.99999989895e-05
          }
        }
        initializer {
          truncated_normal_initializer {
            mean: 0.0
            stddev: 0.0299999993294
          }
        }
        activation: RELU_6
        batch_norm {
          decay: 0.999700009823
          center: true
          scale: true
          epsilon: 0.0010000000475
          train: true
        }
      }
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    box_predictor {
      convolutional_box_predictor {
        conv_hyperparams {
          regularizer {
            l2_regularizer {
              weight: 3.99999989895e-05
            }
          }
          initializer {
            truncated_normal_initializer {
              mean: 0.0
              stddev: 0.0299999993294
            }
          }
          activation: RELU_6
          batch_norm {
            decay: 0.999700009823
            center: true
            scale: true
            epsilon: 0.0010000000475
            train: true
          }
        }
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        use_dropout: false
        dropout_keep_probability: 0.800000011921
        kernel_size: 1
        box_code_size: 4
        apply_sigmoid_to_scores: false
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.20000000298
        max_scale: 0.949999988079
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.333299994469
      }
    }
    post_processing {
      batch_non_max_suppression {
        score_threshold: 0.300000011921
        iou_threshold: 0.600000023842
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
    normalize_loss_by_num_matches: true
    loss {
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_loss {
        weighted_sigmoid {
        }
      }
      hard_example_miner {
        num_hard_examples: 3000
        iou_threshold: 0.990000009537
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 3
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
  }
}
train_config {
  batch_size: 16
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
  keep_checkpoint_every_n_hours: 24.0
  optimizer {
    rms_prop_optimizer {
      learning_rate {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.00079999997979
          decay_steps: 10000
          decay_factor: 0.949999988079
        }
      }
      momentum_optimizer_value: 0.899999976158
      decay: 0.899999976158
      epsilon: 1.0
    }
  }
  gradient_clipping_by_norm: 10.0
  fine_tune_checkpoint: "/content/training/models/ssd_oid_model/model.ckpt"
  from_detection_checkpoint: true
  load_all_detection_checkpoint_vars: true
  num_steps: 10000
}
train_input_reader {
  label_map_path: "/content/training/data/labelMap.pbtxt"
  tf_record_input_reader {
    input_path: "/content/training/data/train.record"
  }
}
eval_config {
  num_examples: 4529
  metrics_set: "oid_V2_detection_metrics"
  use_moving_averages: false
  eval_interval_secs: 1000
}
eval_input_reader {
  label_map_path: "/content/training/data/labelMap.pbtxt"
  shuffle: false
  num_readers: 1
  tf_record_input_reader {
    input_path: "/content/training/data/test.record"
  }
  sample_1_of_n_examples: 10
}
graph_rewriter {
  quantization {
    delay: 1000
    weight_bits: 8
    activation_bits: 8
  }
}


Writing /content/training/models/model/pipeline.config


## Actual training and evaluation of model

In [0]:
# Train model using train.py
!python /content/models/research/object_detection/legacy/train.py \
  --logtostderr \
  --train_dir=/content/training/models/model/ \
  --pipeline_config_path=/content/training/models/model/pipeline.config

In [0]:
# Evaluate model using eval.py
!python /content/models/research/object_detection/legacy/eval.py \
  --logtostderr \
  --checkpoint_dir=/content/training/models/model/ \
  --eval_dir=/content/training/models/model/ \
  --pipeline_config_path=/content/training/models/model/pipeline.config

In [0]:
!rm -rf /content/training/models/model
!mkdir /content/training/models/model

In [0]:
# Train and eval model using model_main.py
!python /content/models/research/object_detection/model_main.py \
  --pipeline_config_path=/content/training/models/model/pipeline.config \
  --model_dir=/content/training/models/model/ \
  --alsologtostderr

## Save model

In [0]:
!rm -rf /content/training/models/savedModel
# Save model
INPUT_TYPE="image_tensor"
INPUT_SHAPE=1,300,300,3
PIPELINE_CONFIG_PATH="/content/training/models/model/pipeline.config"
TRAINED_CKPT_PREFIX="/content/training/models/model/model.ckpt-9849"
EXPORT_DIR="/content/training/models/savedModel"
!python /content/models/research/object_detection/export_inference_graph.py \
  --input_type={INPUT_TYPE} \
  --pipeline_config_path={PIPELINE_CONFIG_PATH} \
  --trained_checkpoint_prefix={TRAINED_CKPT_PREFIX} \
  --output_directory={EXPORT_DIR}

## Run inference on the trained model

In [0]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import glob
import pdb

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt

from PIL import Image
import base64
from io import BytesIO


%cd /content/models/research
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

In [0]:
# What model to download.
# MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'

# model with more accurancy but up to you use a diferent model
MODEL_NAME = 'faster_rcnn_inception_resnet_v2_atrous_coco_2017_11_08'

MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = '/content/training/models/ssd_oid_model/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = '/content/training/data/labelMap.pbtxt'

NUM_CLASSES = 62

# opener = urllib.request.URLopener()
# opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
# tar_file = tarfile.open(MODEL_FILE)
# for file in tar_file.getmembers():
#   file_name = os.path.basename(file.name)
#   if 'frozen_inference_graph.pb' in file_name:
#     tar_file.extract(file, os.getcwd())
    
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
    
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [0]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [0]:
!mkdir /content/training/models/ssd_oid_model

In [0]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
TEST_IMAGE_PATHS = []
PATH_TO_TEST_IMAGES_DIR = '/content/test_images'
for file in glob.glob(PATH_TO_TEST_IMAGES_DIR + '/*.jpg'):
  TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR,file))


# Size, in inches, of the output images.
IMAGE_SIZE = (4, 2)

In [0]:
with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    for image_path in TEST_IMAGE_PATHS:
      image = Image.open(image_path)
      
      buffered = BytesIO()
      image.save(buffered, format="JPEG")
      img_str = tf.Variable(base64.b64encode(buffered.getvalue()), tf.string)

      # the array based representation of the image will be used later in order to prepare the
      # result image with boxes and labels on it.
      image_np = load_image_into_numpy_array(image)
      
      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
      image_np_expanded = np.expand_dims(image_np, axis=0)
      # Actual detection.
      
      (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})
      # Visualization of the results of a detection.
      vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          np.squeeze(boxes),
          np.squeeze(classes).astype(np.int32),
          np.squeeze(scores),
          category_index,
          use_normalized_coordinates=True,
          line_thickness=8)
      plt.figure(figsize=IMAGE_SIZE)
      plt.imshow(image_np)

Output hidden; open in https://colab.research.google.com to view.

## TFLite Conversion

In [0]:
INPUT_TYPE="image_tensor"
INPUT_SHAPE=1,300,300,3
PIPELINE_CONFIG_PATH="/content/training/models/model/pipeline.config"
TRAINED_CKPT_PREFIX="/content/training/models/model/model.ckpt-9849"
EXPORT_DIR="/content/training/models/savedModel"

!rm -rf {EXPORT_DIR}
!python /content/models/research/object_detection/export_tflite_ssd_graph.py \
--pipeline_config_path={PIPELINE_CONFIG_PATH} \
--trained_checkpoint_prefix={TRAINED_CKPT_PREFIX} \
--output_directory={EXPORT_DIR} \
--add_postprocessing_op=true

In [0]:
!tflite_convert \
  --output_file=/content/model.tflite \
  --graph_def_file=/content/training/models/savedModel/tflite_graph.pb \
  --input_shapes=1,300,300,3 \
  --input_arrays=normalized_input_image_tensor \
  --output_arrays='TFLite_Detection_PostProcess','TFLite_Detection_PostProcess:1','TFLite_Detection_PostProcess:2','TFLite_Detection_PostProcess:3'  \
  --inference_type=QUANTIZED_UINT8 \
  --mean_values=128 \
  --std_dev_values=128 \
  --change_concat_input_ranges=false \
  --allow_custom_ops