### 1: Install Tensorflow Object Detection API

Follow the step by step official [installation guide](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md)

### 2: Find a dataset to train

For this demo, I'm using a handgun dataset (over 3000 pictures) you can [download here](https://sci2s.ugr.es/weapons-detection). For a better prediction, you'll have to gather a lot of data.

### 3: Label the desired objects in each picture

I'm using [RectLabel](https://rectlabel.com/) on MacOS but there are a lot of free alternatives such as [LabelImg](https://github.com/tzutalin/labelImg). Both they do the job pretty well. The choice is up to you.

Tips: Before labeling pictures, chunk your dataset into two pieces. The first one, ~ 80% of your dataset is going to be your training data and the left part (~ 20%) gonna be your eval data. This way, you're going to have two folders, **eval/** and **train/** which will contain pictures and their .xml PASCAL VOC representation.

```
training/
-- images/
---- eval/
------ weapon
-------- [files]
---- train/
------ weapon
-------- [files]
```

### 4: Generate training data (TFRecords)

It's time to generate TFRecords that serve as input data to the Tensorflow training model. The following script is going to generate **eval.record** and **train.record** by using pictures and their related .xml PASCAL VOC files.

In [4]:
import os
import xml.etree.ElementTree as ET

import tensorflow as tf
from object_detection.utils import dataset_util

ROOT_FOLDER = '../training'
PATH_TEST = f"{ROOT_FOLDER}/images/eval/weapon/"
PATH_RECORD_TEST = f"{ROOT_FOLDER}/eval.record"
PATH_TRAIN = f"{ROOT_FOLDER}/images/train/weapon/"
PATH_RECORD_TRAIN = f"{ROOT_FOLDER}/train.record"

IMAGE_FORMAT = b'jpg'


def class_text_to_int(row_label):
    if row_label == 'weapon':
        return 1
    else:
        return None


def xml_to_tf(path_input, path_output):
    writer = tf.io.TFRecordWriter(path_output)

    files = os.listdir(path_input)
    for file in files:
        if file.endswith(".xml"):
            xmlFile = path_input + file

            tree = ET.parse(xmlFile)
            root = tree.getroot()

            filename = root.find('filename').text
            width = int(root.find('size')[0].text)
            height = int(root.find('size')[1].text)

            x_mins = []
            x_maxs = []
            y_mins = []
            y_maxs = []
            classes_text = []
            classes = []

            for member in root.findall('object'):
                name = member[0].text
                x_min = int(member[5][0].text)
                y_min = int(member[5][1].text)
                x_max = int(member[5][2].text)
                y_max = int(member[5][3].text)

                x_mins.append(x_min / width)
                x_maxs.append(x_max / width)
                y_mins.append(y_min / height)
                y_maxs.append(y_max / height)
                classes_text.append(name.encode('utf8'))
                classes.append(class_text_to_int(name))

            with tf.io.gfile.GFile(os.path.join(path_input, '{}'.format(filename)), 'rb') as fid:
                encoded_jpg = fid.read()
            tf_example = tf.train.Example(features=tf.train.Features(feature={
                    'image/height'            : dataset_util.int64_feature(height),
                    'image/width'             : dataset_util.int64_feature(width),
                    'image/filename'          : dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/source_id'         : dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/encoded'           : dataset_util.bytes_feature(encoded_jpg),
                    'image/format'            : dataset_util.bytes_feature(IMAGE_FORMAT),
                    'image/object/bbox/xmin'  : dataset_util.float_list_feature(x_mins),
                    'image/object/bbox/xmax'  : dataset_util.float_list_feature(x_maxs),
                    'image/object/bbox/ymin'  : dataset_util.float_list_feature(y_mins),
                    'image/object/bbox/ymax'  : dataset_util.float_list_feature(y_maxs),
                    'image/object/class/text' : dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label': dataset_util.int64_list_feature(classes),
            }))

            writer.write(tf_example.SerializeToString())
    writer.close()
    output_path = os.path.join(os.getcwd(), path_output)
    print('Successfully created the TFRecords: {}'.format(output_path))


xml_to_tf(PATH_TEST, PATH_RECORD_TEST)
xml_to_tf(PATH_TRAIN, PATH_RECORD_TRAIN)

Successfully created the TFRecords: /Users/techlead/PycharmProjects/demo/notebook/../training/eval.record
Successfully created the TFRecords: /Users/techlead/PycharmProjects/demo/notebook/../training/train.record


### 5: Create a label map and set the training config

The label map tells the trainer what each object is by defining a mapping of class names to class ID numbers.

Just create a **label_map.pbtxt** and add your classes. In this demo, I'm using one single class: **weapon**.

```
item {
    id: 1
    name: 'weapon'
}

item {
    id: 2
    name: 'another_class'
}

...
```

Then, you'll have to create a configuration pipeline. I'm using the existing [config for SSDLite MobileNet v2 COCO](https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/ssdlite_mobilenet_v2_coco.config).

Also, grab the fine tune checkpoint for this model: [ssdlite_mobilenet_v2_coco.tar.gz](http://download.tensorflow.org/models/object_detection/ssdlite_mobilenet_v2_coco_2018_05_09.tar.gz)

Replace the number of classes by your number of classes you got (here, 1) and update all PATHS placeholders to your project:

> Note: my training data along with the label map and the pipeline config is stored on Google Cloud Storage, because we're going to train our model on **CLOUD AI VISION** service (ex CLOUD ML).
> Replace **{bucket_id}** by our own bucket id: **gs://{bucket_id}/training/xxx**

```
model {
  ssd {
    num_classes: 1
    image_resizer {
      fixed_shape_resizer {
        height: 300
        width: 270
        resize_method: AREA
      }
    }
    feature_extractor {
      type: "ssd_mobilenet_v2"
      depth_multiplier: 1.0
      min_depth: 16
      conv_hyperparams {
        regularizer {
          l2_regularizer {
            weight: 3.99999989895e-05
          }
        }
        initializer {
          truncated_normal_initializer {
            mean: 0.0
            stddev: 0.0299999993294
          }
        }
        activation: RELU_6
        batch_norm {
          decay: 0.999700009823
          center: true
          scale: true
          epsilon: 0.0010000000475
          train: true
        }
      }
      use_depthwise: true
    }
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    box_predictor {
      convolutional_box_predictor {
        conv_hyperparams {
          regularizer {
            l2_regularizer {
              weight: 3.99999989895e-05
            }
          }
          initializer {
            truncated_normal_initializer {
              mean: 0.0
              stddev: 0.0299999993294
            }
          }
          activation: RELU_6
          batch_norm {
            decay: 0.999700009823
            center: true
            scale: true
            epsilon: 0.0010000000475
            train: true
          }
        }
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        use_dropout: false
        dropout_keep_probability: 0.800000011921
        kernel_size: 3
        box_code_size: 4
        apply_sigmoid_to_scores: false
        use_depthwise: true
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.20000000298
        max_scale: 0.949999988079
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.333299994469
      }
    }
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.6
        max_detections_per_class: 20
        max_total_detections: 20
      }
      score_converter: SIGMOID
    }
    normalize_loss_by_num_matches: true
    loss {
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_loss {
        weighted_sigmoid {
        }
      }
      hard_example_miner {
        num_hard_examples: 3000
        iou_threshold: 0.990000009537
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 3
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
  }
}
train_config {
  batch_size: 24
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
  optimizer {
    rms_prop_optimizer {
      learning_rate {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.00400000018999
          decay_steps: 800720
          decay_factor: 0.949999988079
        }
      }
      momentum_optimizer_value: 0.899999976158
      decay: 0.899999976158
      epsilon: 1.0
    }
  }
  fine_tune_checkpoint: "gs://{bucket_id}/training/model.ckpt"
  num_steps: 200000
  fine_tune_checkpoint_type: "detection"
}
train_input_reader {
  label_map_path: "gs://{bucket_id}/training/label_map.pbtxt"
  tf_record_input_reader {
    input_path: "gs://{bucket_id}/training/train.record"
  }
}
eval_config {
  num_examples: 8000
  max_evals: 10
  use_moving_averages: false
}
eval_input_reader {
  label_map_path: "gs://{bucket_id}/training/label_map.pbtxt"
  shuffle: false
  num_readers: 1
  tf_record_input_reader {
    input_path: "gs://{bucket_id}/training/eval.record"
  }
}
```

### 6: Check'in

Your GCS Bucket should contains a **training/** folder holding theses files:

- eval.record
- train.record
- label_map.pbtxt
- ssdlite_mobilenet_v2_coco.config
- model.ckpt.* (x3)

### 7: Set the training cluster configuration

Make a **cloud.yml** file and put that config in it. You can obviously changes settings to fit your needs.

```
trainingInput:
  runtimeVersion: "1.12"
  scaleTier: CUSTOM
  masterType: standard_gpu
  workerCount: 5
  workerType: standard_gpu
  parameterServerCount: 3
  parameterServerType: standard
```

More info [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/running_on_cloud.md).

### 8: Run the training on AI VISION

In [6]:
#!/usr/bin/env bash

gcloud ai-platform jobs submit training object_detection_`date +%m_%d_%Y_%H_%M_%S` \
    --runtime-version 1.12 \
    --job-dir gs://{bucket_id}/train \
    --packages dist/object_detection-0.1.tar.gz,slim/dist/slim-0.1.tar.gz,/tmp/pycocotools/pycocotools-2.0.tar.gz \
    --module-name object_detection.model_main \
    --region us-east1 \
    --config cloud.yml \
    -- \
    --model_dir gs://{bucket_id}/train \
    --pipeline_config_path gs://{bucket_id}/training/ssdlite_mobilenet_v2_coco.config

SyntaxError: invalid syntax (<ipython-input-6-48bb6b4f586b>, line 3)

This is going to take a while until you achieve a good loss depending on the volume of your dataset, the number of classes etc.

You can monitor the job by running `tensorboard --logdir=gs://{bucket_id}/train --host localhost`

### 9: Export Inference Graph 

To export the frozen inference graph (.pb), you'll have to download the most recent model.ckpt.* files from your GCS bucket.

Next, go to your Tensorflow **model/research/object_detection/** folder and run this command `python export_inference_graph.py --input_type image_tensor --pipeline_config_path training/ssdlite_mobilenet_v2_coco.config --trained_checkpoint_prefix training/model.ckpt --output_directory inference_graph
`

### 10: Use your model!

In [8]:
import cv2 as cv
import tensorflow as tf

FROZEN_INFERENCE_GRAPH_PATH = 'PATH/TO/THE/frozen_inference_graph.pb'


class Detector(object):
    def __init__(self, precision=0.3):
        self.precision = precision
        with tf.gfile.FastGFile(FROZEN_INFERENCE_GRAPH_PATH, 'rb') as f:
            self.graph_def = tf.compat.v1.GraphDef()
            self.graph_def.ParseFromString(f.read())

        self.sess = tf.compat.v1.Session()
        self.sess.graph.as_default()
        tf.import_graph_def(self.graph_def, name='')

    def detect(self, img):
        rows = img.shape[0]
        cols = img.shape[1]
        inp = cv.resize(img, (300, 270))
        inp = inp[:, :, [2, 1, 0]]

        # Run the model
        out = self.sess.run([
            self.sess.graph.get_tensor_by_name('num_detections:0'),
            self.sess.graph.get_tensor_by_name('detection_scores:0'),
            self.sess.graph.get_tensor_by_name('detection_boxes:0'),
            self.sess.graph.get_tensor_by_name('detection_classes:0')
        ], feed_dict={'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3)})

        # Visualize detected bounding boxes.
        num_detections = int(out[0][0])
        for i in range(num_detections):
            _ = int(out[3][0][i])  # Index
            score = float(out[1][0][i])
            bbox = [float(v) for v in out[2][0][i]]

            if score > self.precision:
                x = bbox[1] * cols
                y = bbox[0] * rows
                right = bbox[3] * cols
                bottom = bbox[2] * rows
                cv.rectangle(img,
                             (int(x), int(y)),
                             (int(right), int(bottom)),
                             (125, 255, 51),
                             thickness=2)

        cv.imshow('Detections', img)
        cv.waitKey(0)
        
detector = Detector()
detector.detect(...)

Instructions for updating:
Use tf.gfile.GFile.


NotFoundError: PATH/TO/THE/frozen_inference_graph.pb; No such file or directory