In [0]:
%tensorflow_version 1.x

In [0]:
!pip install 'tensorflow==1.14.0'

In [0]:
import tensorflow
tensorflow.__version__

In [0]:
import numpy
numpy.__version__

In [0]:
!pip install numpy===1.16.4

In [0]:
################################################################################
# Things to change:
NUM_TRAIN_STEPS = 5000
# MODEL_TYPE = 'ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18'
# CONFIG_TYPE = 'ssd_mobilenet_v1_quantized_300x300_coco14_sync'

MODEL_TYPE = 'ssd_mobilenet_v2_coco_2018_03_29'
CONFIG_TYPE = 'ssd_mobilenet_v2_coco'
################################################################################

import os
GOOGLE_DRIVE_MOUNT    = '/content/drive'
CHECKPOINT_PATH = '/content/checkpoint'
OUTPUT_PATH     = '/content/output'
EXPORTED_PATH   = '/content/exported'
DATA_PATH       = '/content/drive/My Drive/data'

In [0]:
import os

!cd /content
!git clone --depth=1 https://github.com/tensorflow/models.git
!pip install --no-deps tensorflowjs==1.4.0

%cd /content/models/research
!protoc object_detection/protos/*.proto --python_out=.

pwd = os.getcwd()
os.environ['PYTHONPATH'] += f':{pwd}:{pwd}/slim'

In [0]:
# Test Setup
!python object_detection/builders/model_builder_test.py

In [0]:
from google.colab import drive
drive.mount(GOOGLE_DRIVE_MOUNT)

# Generate TFRecords
The TensorFlow Object Detection API expects our data to be in the format of TFRecords.

The TFRecord format is a collection of serialized feature dicts, one for each image, looking something like this:
```
{
  'image/height': 1800,
  'image/width': 2400,
  'image/filename': 'image1.jpg',
  'image/source_id': 'image1.jpg',
  'image/encoded': ACTUAL_ENCODED_IMAGE_DATA_AS_BYTES,
  'image/format': 'jpeg',
  'image/object/bbox/xmin': [0.7255949630314233, 0.8845598428835489],
  'image/object/bbox/xmax': [0.9695875693160814, 1.0000000000000000],
  'image/object/bbox/ymin': [0.5820120073891626, 0.1829972290640394],
  'image/object/bbox/ymax': [1.0000000000000000, 0.9662484605911330],
  'image/object/class/text': (['Cat', 'Dog']),
  'image/object/class/label': ([1, 2])
}
```

In [0]:
LABEL_MAP_PATH    = os.path.join(DATA_PATH, 'label_map.pbtxt')
TRAIN_RECORD_PATH = os.path.join(DATA_PATH, 'train.record')
VAL_RECORD_PATH   = os.path.join(DATA_PATH, 'val.record')

MY_DRIVE = '/content/drive/My Drive/'
classes_file = MY_DRIVE + 'OIDv4_ToolKit/classes.txt'
class_descriptions_file = MY_DRIVE + 'OIDv4_ToolKit/OID/csv_folder/class-descriptions-boxable.csv'
annotations_file = MY_DRIVE + 'OIDv4_ToolKit/OID/csv_folder/train-annotations-bbox.csv'
images_dir = MY_DRIVE + 'OIDv4_ToolKit/OID/Dataset/train'

In [0]:
# Write names of classes in rows

#### classes.txt Example
Apple
Orange
Light switch

In [0]:
import os
import json

# Get a list of labels from the annotations.json
labels = {}
classes = list(filter(None, open(classes_file).read().split('\n')))

classes = {name: idx + 1 for idx, name in enumerate(classes)}
print(f'Classes: {classes}')

# labels = {'Tiger', 'Giraffe', 'Elephant', 'Lion', 'Monkey'}

# Create a file named label_map.pbtxt
os.makedirs(DATA_PATH, exist_ok=True)
with open(LABEL_MAP_PATH, 'w') as f:
  # Loop through all of the labels and write each label to the file with an id
  for idx, label in enumerate(classes):
    f.write('item {\n')
    f.write("\tname: '{}'\n".format(label))
    f.write('\tid: {}\n'.format(idx + 1)) # indexes must start at 1
    f.write('}\n')

In [0]:
import pandas as pd
classes = list(filter(None, open(classes_file).read().split('\n')))
classes = {name: idx + 1 for idx, name in enumerate(classes)}
print(f'Classes: {classes}')

class_descriptions = {row[0]: row[1] for _, row in pd.read_csv(class_descriptions_file, header=None).iterrows()}

annotations = pd.read_csv(annotations_file)
annotations['LabelName'] = annotations['LabelName'].map(lambda n: class_descriptions[n])
annotations = annotations.groupby('ImageID')

In [0]:
import os
import io
import json
import random

from PIL import Image
import PIL.Image
import tensorflow as tf

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util


def create_tf_record(images, label_map, output_file):
  images = map(lambda i: (os.path.basename(i).split('.jpg')[0], i), images)
  images = dict(images)
  print(f'{len(images)} images')
  writer = tf.python_io.TFRecordWriter(output_file)
  count = 0
  for image_id, path in images.items():
    try:
      count = count + 1
      img_width, img_height = Image.open(path).size
      img_data = tf.gfile.GFile(path, 'rb').read()

      xmins = []
      xmaxs = []
      ymins = []
      ymaxs = []
      classes_text = []
      classes_int = []
      # Read in the image.
      with tf.gfile.GFile(path, 'rb') as fid:
        encoded_jpg = fid.read()

      # Open the image with PIL so we can check that it's a jpeg and get the image
      # dimensions.
      encoded_jpg_io = io.BytesIO(encoded_jpg)
      image = PIL.Image.open(encoded_jpg_io)
      if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')

      image_annotations = annotations.get_group(image_id)
      for _, row in image_annotations.loc[image_annotations['LabelName'].isin(classes.keys())].iterrows():
          xmins.append(row['XMin'])
          xmaxs.append(row['XMax'])
          ymins.append(row['YMin'])
          ymaxs.append(row['YMax'])
          classes_text.append(row['LabelName'].encode('utf8'))
          classes_int.append(label_map[row['LabelName']])

      if (img_width < 50 or img_height < 50  or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) < 0.2 or (xmaxs[0] - xmins[0]) / (ymaxs[0] - ymins[0]) > 5.):
        raise AssertionError('Wrong dimensions')
      # Create the TFExample.    
      tf_example = tf.train.Example(features=tf.train.Features(feature={
          'image/height': dataset_util.int64_feature(img_height),
          'image/width': dataset_util.int64_feature(img_width),
          'image/filename': dataset_util.bytes_feature(image_id.encode('utf8')),
          'image/source_id': dataset_util.bytes_feature(image_id.encode('utf8')),
          'image/encoded': dataset_util.bytes_feature(encoded_jpg),
          'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
          'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
          'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
          'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
          'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
          'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
          'image/object/class/label': dataset_util.int64_list_feature(classes_int),
      }))
      
      writer.write(tf_example.SerializeToString())
      print(count, end='')
    except ValueError:
      print('Invalid example, ignoring.')
      pass
    except AssertionError:
      print('Wrong Dimensions')
      pass
    except IndexError:
      print("Index Error")
      pass
    except IOError:
      print("Can't read example, ignoring.")
      pass
      
  writer.close()
  print(" done")

import glob
images = glob.glob(images_dir + '/*/*.jpg')
# images = tf.gfile.Glob(images_dir + '/*/*.jpg')
# Load the label map we created.
label_map = label_map_util.get_label_map_dict(LABEL_MAP_PATH)

random.seed(42)
random.shuffle(images)
num_train = int(0.9 * len(images))
train_examples = images[:num_train]
val_examples = images[num_train:]
create_tf_record(train_examples, label_map, TRAIN_RECORD_PATH)
create_tf_record(val_examples, label_map, VAL_RECORD_PATH)

In [0]:
import os
import tarfile

import six.moves.urllib as urllib

download_base = 'http://download.tensorflow.org/models/object_detection/'
model = MODEL_TYPE + '.tar.gz'
tmp = '/content/checkpoint.tar.gz'

if not (os.path.exists(CHECKPOINT_PATH)):
  # Download the checkpoint
  opener = urllib.request.URLopener()
  opener.retrieve(download_base + model, tmp)

  # Extract all the `model.ckpt` files.
  with tarfile.open(tmp) as tar:
    for member in tar.getmembers():
      member.name = os.path.basename(member.name)
      if 'model.ckpt' in member.name:
        tar.extract(member, path=CHECKPOINT_PATH)

  os.remove(tmp)

In [0]:
import re

from google.protobuf import text_format

from object_detection.utils import config_util
from object_detection.utils import label_map_util

# pipeline_skeleton = '/content/models/research/object_detection/samples/configs/' + 'ssd_mobilenet_v1_quantized_300x300_coco14_sync' + '.config'
pipeline_skeleton = '/content/models/research/object_detection/samples/configs/' + CONFIG_TYPE + '.config'

configs = config_util.get_configs_from_pipeline_file(pipeline_skeleton)

label_map = label_map_util.get_label_map_dict(LABEL_MAP_PATH)
num_classes = len(label_map.keys())
meta_arch = configs["model"].WhichOneof("model")

# Small batch size

override_dict = {
  'model.{}.num_classes'.format(meta_arch): num_classes,
  'train_config.batch_size': 24,
  'train_input_path': TRAIN_RECORD_PATH,
  'eval_input_path': VAL_RECORD_PATH,
  'train_config.fine_tune_checkpoint': os.path.join(CHECKPOINT_PATH, 'model.ckpt'),
  'label_map_path': LABEL_MAP_PATH
}

configs = config_util.merge_external_params_with_configs(configs, kwargs_dict=override_dict)
pipeline_config = config_util.create_pipeline_proto_from_configs(configs)
config_util.save_pipeline_config(pipeline_config, '/content')

In [0]:
!rm -rf $OUTPUT_PATH
!python -m object_detection.model_main \
    --pipeline_config_path=/content/pipeline.config \
    --model_dir=$OUTPUT_PATH \
    --num_train_steps=$NUM_TRAIN_STEPS \
    --num_eval_steps=100

In [0]:
!python -m object_detection.model_main \
    --pipeline_config_path=/content/pipeline.config \
    --model_dir=$OUTPUT_PATH \
    --num_train_steps=200 \
    --num_eval_steps=100

In [0]:
import os
import re
import tensorflow
regex = re.compile(r"model\.ckpt-([0-9]+)\.index")
numbers = [int(regex.search(f).group(1)) for f in os.listdir(OUTPUT_PATH) if regex.search(f)]
TRAINED_CHECKPOINT_PREFIX = os.path.join(OUTPUT_PATH, 'model.ckpt-{}'.format(max(numbers)))

print(f'Using {TRAINED_CHECKPOINT_PREFIX}')

!rm -rf $EXPORTED_PATH
!python -m object_detection.export_inference_graph \
  --pipeline_config_path=/content/pipeline.config \
  --trained_checkpoint_prefix=$TRAINED_CHECKPOINT_PREFIX \
  --output_directory=$EXPORTED_PATH

In [0]:
LABEL_MAP_PATH    = '/content/drive/My Drive/data/label_map.pbtxt'
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode

# Use javascipt to take a photo.
# def take_photo(filename, quality=0.8):
#   js = Javascript('''
#     async function takePhoto(quality) {
#       const div = document.createElement('div');
#       const capture = document.createElement('button');
#       capture.textContent = 'Capture';
#       div.appendChild(capture);

#       const video = document.createElement('video');
#       video.style.display = 'block';
#       const stream = await navigator.mediaDevices.getUserMedia({video: true});

#       document.body.appendChild(div);
#       div.appendChild(video);
#       video.srcObject = stream;
#       await video.play();

#       // Resize the output to fit the video element.
#       google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

#       // Wait for Capture to be clicked.
#       await new Promise((resolve) => capture.onclick = resolve);

#       const canvas = document.createElement('canvas');
#       canvas.width = video.videoWidth;
#       canvas.height = video.videoHeight;
#       canvas.getContext('2d').drawImage(video, 0, 0);
#       stream.getVideoTracks()[0].stop();
#       div.remove();
#       return canvas.toDataURL('image/jpeg', quality);
#     }
#     ''')
#   display(js)
#   data = eval_js('takePhoto({})'.format(quality))
#   binary = b64decode(data.split(',')[1])
#   with open(filename, 'wb') as f:
#     f.write(binary)
#   return filename

# try:
#   take_photo('/content/photo.jpg')
# except Exception as err:
#   # Errors will be thrown if the user does not have a webcam or if they do not
#   # grant the page permission to access it.
#   print(str(err))

# Use the captured photo to make predictions
%matplotlib inline

import os
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from PIL import Image as PImage
from object_detection.utils import visualization_utils as vis_util
from object_detection.utils import label_map_util

# Load the labels
category_index = label_map_util.create_category_index_from_labelmap(LABEL_MAP_PATH, use_display_name=True)

# Load the model
path_to_frozen_graph = os.path.join(EXPORTED_PATH, 'frozen_inference_graph.pb')
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(path_to_frozen_graph, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

with detection_graph.as_default():
  with tf.Session(graph=detection_graph) as sess:
    # Definite input and output Tensors for detection_graph
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
    image = PImage.open('/content/pic_004.jpg')
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    (im_width, im_height) = image.size
    image_np = np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Actual detection.
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_np_expanded})
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32), 
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=.09)
    preds = ([category_index.get(i) for i in classes[0]])
    print(preds[0]['name'], scores[0][0])
    plt.figure(figsize=(12, 8))
    plt.imshow(image_np)

In [0]:
!saved_model_cli show --dir /content/exported/saved_model --tag_set serve --signature_def serving_default

In [0]:
!tensorflowjs_converter 
  --input_format=tf_saved_model \
  --output_node_names='detection_boxes,detection_classes,detection_multiclass_scores,detection_scores,num_detections,raw_detection_boxes,raw_detection_scores' \
  --saved_model_tags=serve \
  --output_format=tfjs_graph_model \
  /content/exported/saved_model \
  /content/web_model

In [0]:
!tensorflowjs_converter \
  --input_format=tf_frozen_model \
  --output_format=tfjs_graph_model \
  --output_node_names='Postprocessor/ExpandDims_1,Postprocessor/Slice' \
  --quantization_bytes=1 \
  --skip_op_check \
  $EXPORTED_PATH/frozen_inference_graph.pb \
  /content/model_web

import json

from object_detection.utils.label_map_util import get_label_map_dict

label_map = get_label_map_dict(LABEL_MAP_PATH)
label_array = [k for k in sorted(label_map, key=label_map.get)]

with open(os.path.join('/content/model_web', 'labels.json'), 'w') as f:
  json.dump(label_array, f)

!cd /content/model_web && zip -r /content/model_web.zip *

In [0]:
from google.colab import files
files.download('/content/model_web.zip') 

In [0]:
!tensorflowjs_converter \
  --input_format=tf_frozen_model \
  --output_format=tfjs_graph_model \
  --output_node_names='Postprocessor/ExpandDims_1,Postprocessor/Slice' \
  --quantization_bytes=1 \
  --skip_op_check \
  $EXPORTED_PATH/frozen_inference_graph.pb \
  /content/model_web

In [0]:
!cd /content/web_model && zip -r /content/web_model.zip *