In [1]:
import os
import tensorflow as tf
import tensorflow_datasets as tfds

## One GPU strategy

In [2]:
# get available GPU
devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(devices[0], True)
gpu_name = "GPU:0"
print(devices[0])

# Only one gpu available to set to OneDeviceStrategy 
# Can be changed to MirroredStrategy if multiple GPU available
strategy = tf.distribute.OneDeviceStrategy(device=gpu_name)

PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


## Get VOC 2012 dataset

In [3]:
# get voc 2012 dataset
splits = ['train[:80%]', 'train[80%:90%]', 'train[90%:]']

(train_examples, validation_examples, test_examples), info = tfds.load('voc/2012', batch_size=32, with_info=True, split=splits)

[1mDownloading and preparing dataset voc/2012/4.0.0 (download: 3.59 GiB, generated: Unknown size, total: 3.59 GiB) to /root/tensorflow_datasets/voc/2012/4.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Extraction completed...', max=1.0, styl…









HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/voc/2012/4.0.0.incompleteU6BXE1/voc-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=10991.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/voc/2012/4.0.0.incompleteU6BXE1/voc-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=5717.0), HTML(value='')))

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/voc/2012/4.0.0.incompleteU6BXE1/voc-validation.tfrecord


HBox(children=(FloatProgress(value=0.0, max=5823.0), HTML(value='')))

[1mDataset voc downloaded and prepared to /root/tensorflow_datasets/voc/2012/4.0.0. Subsequent calls will reuse this data.[0m


In [4]:
info.features

FeaturesDict({
    'image': Image(shape=(None, None, 3), dtype=tf.uint8),
    'image/filename': Text(shape=(), dtype=tf.string),
    'labels': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=20)),
    'labels_no_difficult': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=20)),
    'objects': Sequence({
        'bbox': BBoxFeature(shape=(4,), dtype=tf.float32),
        'is_difficult': tf.bool,
        'is_truncated': tf.bool,
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=20),
        'pose': ClassLabel(shape=(), dtype=tf.int64, num_classes=5),
    }),
})

In [5]:
num_examples = info.splits['train'].num_examples
num_classes = info.features['labels'].num_classes
print(f"Number of train examples: {num_examples}, number of labels: {num_classes}")

Number of train examples: 5717, number of labels: 20


## Preprocess VOC2012

In [6]:
# resize and normalize images
@tf.function
def format_image(tensor):
    images = tf.image.resize(tensor['image'], IMAGE_SIZE) / 255.0
    return images, tf.one_hot(tensor['objects']['label'], 20), tensor['objects']['bbox']

In [7]:
BUFFER_SIZE = num_examples
EPOCHS = 10
IMAGE_SIZE = (640, 640)

BATCH_SIZE_PER_REPLICA = 32
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

In [8]:
# prepare batches
BATCH_SIZE = 32
train_batches = train_examples.shuffle(num_examples // 4).map(format_image).prefetch(1)
validation_batches = validation_examples.map(format_image)
test_batches = test_examples.map(format_image).batch(1)

In [9]:
for batch, labels, boxes in train_batches:
     break
print(f"Batch shape: {batch.shape}")
print(f"Labels shape: {labels.shape} and boxes shape: {boxes.shape}")

Batch shape: (32, 640, 640, 3)
Labels shape: (32, 10, 20) and boxes shape: (32, 10, 4)


## Distribute dataset over GPUs

In [10]:
# def distribute_datasets(strategy, train_batches, validation_batches, test_batches):
#     train_dist_dataset = strategy.experimental_distribute_dataset(train_batches)
#     val_dist_dataset = strategy.experimental_distribute_dataset(validation_batches)
#     test_dist_dataset = strategy.experimental_distribute_dataset(test_batches)

#     return train_dist_dataset, val_dist_dataset, test_dist_dataset

In [11]:
# train_dist_dataset, val_dist_dataset, test_dist_dataset = distribute_datasets(strategy, train_batches, validation_batches, test_batches)
# print(type(train_dist_dataset))

In [12]:
# for batch, labels, boxes in test_dist_dataset:
#     break
# print(f"Batch shape: {batch.shape}")
# print(f"Labels shape: {labels.shape} and boxes shape: {boxes.shape}")

## Model

In [13]:
import os
import pathlib

# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models

Cloning into 'models'...
remote: Enumerating objects: 2362, done.[K
remote: Counting objects: 100% (2362/2362), done.[K
remote: Compressing objects: 100% (2059/2059), done.[K
remote: Total 2362 (delta 577), reused 937 (delta 277), pack-reused 0[K
Receiving objects: 100% (2362/2362), 30.69 MiB | 9.90 MiB/s, done.
Resolving deltas: 100% (577/577), done.


In [14]:
# Install the Object Detection API
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

Processing /content/models/research
Collecting avro-python3
  Downloading https://files.pythonhosted.org/packages/3f/84/ef37f882a7d93674d6fe1aa6e99f18cf2f34e9b775952f3d85587c11c92e/avro-python3-1.10.1.tar.gz
Collecting apache-beam
  Downloading https://files.pythonhosted.org/packages/87/cc/a4b787f070e749836ffec7ed6e7df606b5a00ef5b8307cf2c7f7a6a14f1c/apache_beam-2.26.0-cp36-cp36m-manylinux2010_x86_64.whl (8.9MB)
Collecting tf-slim
  Downloading https://files.pythonhosted.org/packages/02/97/b0f4a64df018ca018cc035d44f2ef08f91e2e8aa67271f6f19633a015ff7/tf_slim-1.1.0-py2.py3-none-any.whl (352kB)
Collecting lvis
  Downloading https://files.pythonhosted.org/packages/72/b6/1992240ab48310b5360bfdd1d53163f43bb97d90dc5dc723c67d41c38e78/lvis-0.5.3-py3-none-any.whl
Collecting tf-models-official
  Downloading https://files.pythonhosted.org/packages/5b/33/91e5e90e3e96292717245d3fe87eb3b35b07c8a2113f2da7f482040facdb/tf_models_official-2.3.0-py2.py3-none-any.whl (840kB)
Collecting hdfs<3.0.0,>=2.1.0
  

ERROR: multiprocess 0.70.11.1 has requirement dill>=0.3.3, but you'll have dill 0.3.1.1 which is incompatible.
ERROR: google-colab 1.0.0 has requirement requests~=2.23.0, but you'll have requests 2.25.1 which is incompatible.
ERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.
ERROR: apache-beam 2.26.0 has requirement avro-python3!=1.9.2,<1.10.0,>=1.8.1, but you'll have avro-python3 1.10.1 which is incompatible.


In [15]:
import matplotlib
import matplotlib.pyplot as plt

import os
import random
import io
import imageio
import glob
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display, Javascript
from IPython.display import Image as IPyImage

import tensorflow as tf

from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import colab_utils
from object_detection.builders import model_builder

%matplotlib inline

In [16]:
# Download the checkpoint and put it into models/research/object_detection/test_data/

!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
!tar -xf ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
!mv ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/checkpoint models/research/object_detection/test_data/

--2020-12-20 13:55:02--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 74.125.68.128, 2404:6800:4003:c02::80
Connecting to download.tensorflow.org (download.tensorflow.org)|74.125.68.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 244817203 (233M) [application/x-tar]
Saving to: ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz’


2020-12-20 13:55:05 (145 MB/s) - ‘ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz’ saved [244817203/244817203]



In [17]:
tf.keras.backend.clear_session()

print('Building model and restoring weights for fine-tuning...', flush=True)
num_classes = 20
pipeline_config = 'models/research/object_detection/configs/tf2/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.config'
checkpoint_path = 'models/research/object_detection/test_data/checkpoint/ckpt-0'

# Load pipeline config and build a detection model.
#
# Since we are working off of a COCO architecture which predicts 90
# class slots by default, we override the `num_classes` field here to be just
# one (for our new rubber ducky class).
configs = config_util.get_configs_from_pipeline_file(pipeline_config)
model_config = configs['model']
model_config.ssd.num_classes = num_classes
model_config.ssd.freeze_batchnorm = True
detection_model = model_builder.build(
      model_config=model_config, is_training=True)

# Set up object-based checkpoint restore --- RetinaNet has two prediction
# `heads` --- one for classification, the other for box regression.  We will
# restore the box regression head but initialize the classification head
# from scratch (we show the omission below by commenting out the line that
# we would add if we wanted to restore both heads)
fake_box_predictor = tf.compat.v2.train.Checkpoint(
    _base_tower_layers_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
    # _prediction_heads=detection_model._box_predictor._prediction_heads,
    #    (i.e., the classification head that we *will not* restore)
    _box_prediction_head=detection_model._box_predictor._box_prediction_head,
    )
fake_model = tf.compat.v2.train.Checkpoint(
          _feature_extractor=detection_model._feature_extractor,
          _box_predictor=fake_box_predictor)
ckpt = tf.compat.v2.train.Checkpoint(model=fake_model)
ckpt.restore(checkpoint_path).expect_partial()

# Run model through a dummy image so that variables are created
image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict = detection_model.predict(image, shapes)
_ = detection_model.postprocess(prediction_dict, shapes)
print('Weights restored!')

Building model and restoring weights for fine-tuning...
Weights restored!


In [18]:
tf.get_logger().setLevel('ERROR')
from tqdm import tqdm

In [19]:
tf.keras.backend.set_learning_phase(True)

# These parameters can be tuned; since our training set has 5 images
# it doesn't make sense to have a much larger batch size, though we could
# fit more examples in memory if we wanted to.
batch_size = 32
learning_rate = 0.01
num_batches = len(train_batches)

# Select variables in top layers to fine-tune.
trainable_variables = detection_model.trainable_variables
to_fine_tune = []
prefixes_to_train = [
  'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',
  'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead']
for var in trainable_variables:
  if any([var.name.startswith(prefix) for prefix in prefixes_to_train]):
    to_fine_tune.append(var)

# Set up forward + backward pass for a single train step.
def get_model_train_step_function(model, optimizer, vars_to_fine_tune):
  """Get a tf.function for training step."""

  # Use tf.function for a bit of speed.
  # Comment out the tf.function decorator if you want the inside of the
  # function to run eagerly.
  @tf.function
  def train_step_fn(image_tensors,
                    groundtruth_boxes_list,
                    groundtruth_classes_list):
    """A single training iteration.

    Args:
      image_tensors: A list of [1, height, width, 3] Tensor of type tf.float32.
        Note that the height and width can vary across images, as they are
        reshaped within this function to be 640x640.
      groundtruth_boxes_list: A list of Tensors of shape [N_i, 4] with type
        tf.float32 representing groundtruth boxes for each image in the batch.
      groundtruth_classes_list: A list of Tensors of shape [N_i, num_classes]
        with type tf.float32 representing groundtruth boxes for each image in
        the batch.

    Returns:
      A scalar tensor representing the total loss for the input batch.
    """
    shapes = tf.constant(batch_size * [[640, 640, 3]], dtype=tf.int32)
    model.provide_groundtruth(
        groundtruth_boxes_list=groundtruth_boxes_list,
        groundtruth_classes_list=groundtruth_classes_list)
    with tf.GradientTape() as tape:
      preprocessed_images = tf.concat(
          [detection_model.preprocess(image_tensor)[0]
           for image_tensor in image_tensors], axis=0)
      prediction_dict = model.predict(preprocessed_images, shapes)
      losses_dict = model.loss(prediction_dict, shapes)
      total_loss = losses_dict['Loss/localization_loss'] + losses_dict['Loss/classification_loss']
      gradients = tape.gradient(total_loss, vars_to_fine_tune)
      optimizer.apply_gradients(zip(gradients, vars_to_fine_tune))
    return total_loss

  return train_step_fn

optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
train_step_fn = get_model_train_step_function(
    detection_model, optimizer, to_fine_tune)

print('Start fine-tuning!', flush=True)
#for idx in range(num_batches):
for epoch in range(20):
  pbar = tqdm(total=num_batches, position=0, leave=True, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} ')
  for idx, (image_tensors, gt_classes_list, gt_boxes_list) in enumerate(train_batches):
    image_tensors = tf.expand_dims(image_tensors, 1)
    image_tensors, gt_boxes_list, gt_classes_list = list(image_tensors.numpy()), list(gt_boxes_list.numpy()), list(gt_classes_list.numpy())
    # Training step (forward pass + backwards pass)
    total_loss = train_step_fn(image_tensors, gt_boxes_list, gt_classes_list)
    pbar.set_description("Training loss for step %s: %.4f" % (int(idx), float(total_loss)))
    pbar.update()

print('Done fine-tuning!')

Start fine-tuning!


Training loss for step 100: 1.4266:  71%|███████   | 101/143 

ResourceExhaustedError: ignored