## Choosing a pre-training model

In [None]:
# Some available models to train on
MODELS_CONFIG = {
    'ssd_mobilenet_v2': {
        'model_name': 'ssd_mobilenet_v2_coco_2018_03_29',
        'pipeline_file': 'ssd_mobilenet_v2_coco.config',
    },
    'ssd_inception_v2': {
        'model_name': 'ssd_inception_v2_coco_2018_01_28',
        'pipeline_file': 'ssd_inception_v2_coco.config',
    },
    'ssd_resnet_50_fpn_coco': {
        'model_name': 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03',
        'pipeline_file': 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync.config',
    },
    'faster_rcnn_inception_v2': {
        'model_name': 'faster_rcnn_inception_v2_coco_2018_01_28',
        'pipeline_file': 'faster_rcnn_inception_v2_pets.config',
    },
    'rfcn_resnet101': {
        'model_name': 'rfcn_resnet101_coco_2018_01_28',
        'pipeline_file': 'rfcn_resnet101_pets.config',
    }
}

selected_model = 'ssd_inception_v2'

## Installing Required Packages 

In [None]:
!pip install tensorflow-gpu==1.15.0 #downgrade

!apt-get install -qq protobuf-compiler python-pil python-lxml python-tk

!pip install -qq Cython contextlib2 pillow lxml matplotlib

!pip install -qq pycocotools

!pip install tf_slim

!pip install lvis

## Imports

In [None]:
from __future__ import division, print_function, absolute_import
import tensorflow.compat.v1 as tf
import os
import glob
import xml.etree.ElementTree as ET
import pandas as pd
import csv
import cv2

from collections import namedtuple, OrderedDict
import io
from PIL import Image

import urllib.request
import tarfile
import shutil

## Mounting to google drive

In [None]:
from google.colab import drive
 
drive.mount('/gdrive')
%cd /gdrive/'My Drive'

## Downloading and Organizing Images and Annotations

In [None]:
#Project Directory
!mkdir knife_detection

In [None]:
cd knife_detection

In [None]:
!git clone https://github.com/ari-dasci/OD-WeaponDetection.git 

In [None]:
!mkdir data

!mkdir data/images data/train_labels data/test_labels

In [None]:
!mv OD-WeaponDetection/Knife_detection/Images/* data/images
!mv OD-WeaponDetection/Knife_detection/annotations/* data/train_labels

# Label first 400 image as testing
!ls data/train_labels/* | sort -R | head -400 | xargs -I{} mv {} data/test_labels

In [None]:
!ls -1 data/train_labels/ | wc -l

In [None]:
!ls -1 data/test_labels/ | wc -l

## Preprocessing Images and Labels

In [None]:
%cd /gdrive/My Drive/knife_detection/data

images_extension = 'jpg'

def xml_to_csv(path):
  classes_names = []
  xml_list = []

  for xml_file in glob.glob(path + '/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
      classes_names.append(member[0].text)
      value = (root.find('filename').text,
               int(root.find('size')[0].text),
               int(root.find('size')[1].text),
               member[0].text,
               int(member[4][0].text),
               int(member[4][1].text),
               int(member[4][2].text),
               int(member[4][3].text))
      xml_list.append(value)
  column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
  xml_df = pd.DataFrame(xml_list, columns=column_name) 
  classes_names = list(set(classes_names))
  classes_names.sort()
  return xml_df, classes_names

for label_path in ['train_labels', 'test_labels']:
  image_path = os.path.join(os.getcwd(), label_path)
  xml_df, classes = xml_to_csv(label_path)
  xml_df.to_csv(f'{label_path}.csv', index=None)
  print(f'Successfully converted {label_path} xml to csv.')

label_map_path = os.path.join("label_map.pbtxt")

pbtxt_content = ""

for i, class_name in enumerate(classes):
    pbtxt_content = (
        pbtxt_content
        + "item {{\n    id: {0}\n    name: '{1}'\n    display_name: 'Knife'\n }}\n\n".format(i + 1, class_name)
    )
pbtxt_content = pbtxt_content.strip()
with open(label_map_path, "w") as f:
    f.write(pbtxt_content)

In [None]:
!cat label_map.pbtxt

In [None]:
%cd /gdrive/My Drive/knife_detection/data

images_path = 'images'

for CSV_FILE in ['train_labels.csv', 'test_labels.csv']:
  with open(CSV_FILE, 'r') as fid:  
      print('[*] Checking file:', CSV_FILE) 
      file = csv.reader(fid, delimiter=',')
      first = True 
      cnt = 0
      error_cnt = 0
      error = False
      for row in file:
          if error == True:
              error_cnt += 1
              error = False         
          if first == True:
              first = False
              continue     
          cnt += 1      
          name, width, height, xmin, ymin, xmax, ymax = row[0], int(row[1]), int(row[2]), int(row[4]), int(row[5]), int(row[6]), int(row[7])     
          path = os.path.join(images_path, name)
          img = cv2.imread(path)         
          if type(img) == type(None):
              error = True
              print('Could not read image', img)
              continue     
          org_height, org_width = img.shape[:2]     
          if org_width != width:
              error = True
              print('Width mismatch for image: ', name, width, '!=', org_width)     
          if org_height != height:
              error = True
              print('Height mismatch for image: ', name, height, '!=', org_height) 
          if xmin > org_width:
              error = True
              print('XMIN > org_width for file', name)  
          if xmax > org_width:
              error = True
              print('XMAX > org_width for file', name)
          if ymin > org_height:
              error = True
              print('YMIN > org_height for file', name)
          if ymax > org_height:
              error = True
              print('YMAX > org_height for file', name)
          if error == True:
              print('Error for file: %s' % name)
              print()
      print()
      print('Checked %d files and realized %d errors' % (cnt, error_cnt))
      print("-----")

## Downloading Tensorflow model

In [None]:
# Downloads Tensorflow
%cd /gdrive/My Drive/knife_detection
!git clone --q https://github.com/tensorflow/models.git

In [None]:
%cd /gdrive/My Drive/knife_detection/models/research

!protoc object_detection/protos/*.proto --python_out=.

os.environ['PYTHONPATH'] += ':/gdrive/My Drive/knife_detection/models/research/:/gdrive/My Drive/knife_detection/models/research/slim/'

In [None]:
!python3 object_detection/builders/model_builder_test.py

## Create Tf record

In [None]:
from object_detection.utils import dataset_util
%cd /gdrive/My Drive/knife_detection/models/

DATA_BASE_PATH = '/gdrive/My Drive/knife_detection/data/'
image_dir = DATA_BASE_PATH +'images/'

def class_text_to_int(row_label):
		if row_label == 'knife':
				return 1
		else:
				None


def split(df, group):
		data = namedtuple('data', ['filename', 'object'])
		gb = df.groupby(group)
		return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

def create_tf_example(group, path):
		with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
				encoded_jpg = fid.read()
		encoded_jpg_io = io.BytesIO(encoded_jpg)
		image = Image.open(encoded_jpg_io)
		width, height = image.size

		filename = group.filename.encode('utf8')
		image_format = b'jpg'
		xmins = []
		xmaxs = []
		ymins = []
		ymaxs = []
		classes_text = []
		classes = []

		for index, row in group.object.iterrows():
				xmins.append(row['xmin'] / width)
				xmaxs.append(row['xmax'] / width)
				ymins.append(row['ymin'] / height)
				ymaxs.append(row['ymax'] / height)
				classes_text.append(row['class'].encode('utf8'))
				classes.append(class_text_to_int(row['class']))

		tf_example = tf.train.Example(features=tf.train.Features(feature={
				'image/height': dataset_util.int64_feature(height),
				'image/width': dataset_util.int64_feature(width),
				'image/filename': dataset_util.bytes_feature(filename),
				'image/source_id': dataset_util.bytes_feature(filename),
				'image/encoded': dataset_util.bytes_feature(encoded_jpg),
				'image/format': dataset_util.bytes_feature(image_format),
				'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
				'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
				'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
				'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
				'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
				'image/object/class/label': dataset_util.int64_list_feature(classes),
		}))
		return tf_example

for csv in ['train_labels', 'test_labels']:
  writer = tf.io.TFRecordWriter(DATA_BASE_PATH + csv + '.record')
  path = os.path.join(image_dir)
  examples = pd.read_csv(DATA_BASE_PATH + csv + '.csv')
  grouped = split(examples, 'filename')
  for group in grouped:
      tf_example = create_tf_example(group, path)
      writer.write(tf_example.SerializeToString())
    
  writer.close()
  output_path = os.path.join(os.getcwd(), DATA_BASE_PATH + csv + '.record')
  print('Successfully created the TFRecords: {}'.format(DATA_BASE_PATH +csv + '.record'))


In [None]:
# TFRecords are created
%cd /gdrive/My Drive/knife_detection/data
!ls -lX

## Downloading the Base Model

In [None]:
%cd /gdrive/My Drive/knife_detection/models/

DATA_BASE_PATH = '/gdrive/My Drive/knife_detection/data/'
image_dir = DATA_BASE_PATH +'images/'

In [None]:
%cd /gdrive/My Drive/knife_detection/models/research/
!mkdir pretrained_model
%cd pretrained_model
!wget http://download.tensorflow.org/models/object_detection/ssd_inception_v2_coco_2018_01_28.tar.gz
!tar -xzvf ssd_inception_v2_coco_2018_01_28.tar.gz

In [None]:
%cd /gdrive/My Drive/knife_detection/models/research/pretrained_model

MODEL = MODELS_CONFIG[selected_model]['model_name']

#the distination folder where the model will be saved
fine_tune_dir = '/gdrive/My Drive/knife_detection/models/research/pretrained_model'

In [None]:
!echo {fine_tune_dir}
%cd {fine_tune_dir}
!ls -alh 

## Configuring Training Pipeline

In [None]:
CONFIG_BASE = "/gdrive/My\ Drive/knife_detection/models/research/object_detection/samples/configs"

#path to the specified model's config file
pipeline_file = MODELS_CONFIG[selected_model]['pipeline_file']
model_pipline = os.path.join(CONFIG_BASE, pipeline_file)
model_pipline

In [None]:
%cd /gdrive/My\ Drive/knife_detection/models/research/object_detection/samples/configs/
!cat ssd_inception_v2_coco.config

In [None]:
#editing the configuration file to add the path for the TFRecords files, pbtxt,batch_size,num_steps,num_classes.
# any image augmentation, hyperparemeter tunning (drop out, batch normalization... etc) would be editted here

%%writefile ssd_inception_v2_coco.config
model {
  ssd {
    num_classes: 1 # number of classes to be detected
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    anchor_generator {
      ssd_anchor_generator {
        num_layers: 6
        min_scale: 0.2
        max_scale: 0.95
        aspect_ratios: 1.0
        aspect_ratios: 2.0
        aspect_ratios: 0.5
        aspect_ratios: 3.0
        aspect_ratios: 0.3333
        reduce_boxes_in_lowest_layer: true
      }
    }
    # all images will be resized to the below W x H.
    image_resizer { 
      fixed_shape_resizer {
        height: 300
        width: 300
      }
    }
    box_predictor {
      convolutional_box_predictor {
        min_depth: 0
        max_depth: 0
        num_layers_before_predictor: 0
        #use_dropout: false
        use_dropout: true # to counter over fitting. you can also try tweaking its probability below
        dropout_keep_probability: 0.8
        kernel_size: 1
        box_code_size: 4
        apply_sigmoid_to_scores: false
        conv_hyperparams {
          activation: RELU_6,
          regularizer {
            l2_regularizer {
            # weight: 0.00004
            weight: 0.001 # higher regularizition to counter overfitting
          }
          }
          initializer {
            truncated_normal_initializer {
              stddev: 0.03
              mean: 0.0
            }
          }
          # batch_norm {
          #   train: true,
          #   scale: true,
          #   center: true,
          #   decay: 0.9997,
          #   epsilon: 0.001,
          # }
        }
      }
    }
    feature_extractor {
      type: 'ssd_inception_v2'
      min_depth: 16
      depth_multiplier: 1.0
      conv_hyperparams {
        activation: RELU_6,
        regularizer {
          l2_regularizer {
            # weight: 0.00004
            weight: 0.001 # higher regularizition to counter overfitting
          }
        }
        initializer {
          truncated_normal_initializer {
            stddev: 0.03
            mean: 0.0
          }
        }
        batch_norm {
          train: true,
          scale: true,
          center: true,
          decay: 0.9997,
          epsilon: 0.001,
        }
      }
      override_base_feature_extractor_hyperparams: true
    }
    loss {
      classification_loss {
        weighted_sigmoid {
            anchorwise_output: true
        }
      }
      localization_loss {
        weighted_smooth_l1 {
            anchorwise_output: true
        }
      }
      hard_example_miner {
        num_hard_examples: 3000 
        iou_threshold: 0.95
        loss_type: CLASSIFICATION
        max_negatives_per_positive: 3
        min_negatives_per_image: 3
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.6
        
        max_detections_per_class: 16
        max_total_detections: 16
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  batch_size: 16 # training batch size
  optimizer {
    rms_prop_optimizer: {
      learning_rate: {
        exponential_decay_learning_rate {
          initial_learning_rate: 0.003
          decay_steps: 800720
          decay_factor: 0.95
        }
      }
      momentum_optimizer_value: 0.9
      decay: 0.9
      epsilon: 1.0
    }
  }

  fine_tune_checkpoint: "/gdrive/My Drive/knife_detection/models/research/pretrained_model/model.ckpt" #the path to the pretrained model. 
  fine_tune_checkpoint_type:  "detection"
  num_steps: 2000000 
  

  #data augmentaion is done here, you can remove or add more.
  # They will help the model generalize but the training time will increase greatly by using more data augmentation.
  # Check this link to add more image augmentation: https://github.com/tensorflow/models/blob/master/research/object_detection/protos/preprocessor.proto
  
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_adjust_contrast {
    }
  }
  data_augmentation_options {
    ssd_random_crop {
    }
  }
}

train_input_reader: {
  tf_record_input_reader {
    #path to the training TFRecord
    input_path: "/gdrive/My Drive/knife_detection/data/train_labels.record"
  }
  #path to the label map 
  label_map_path: "/gdrive/My Drive/knife_detection/data/label_map.pbtxt"
}

eval_config: {
  # the number of images in your "testing" data (was 600 but we removed one above :) )
  num_examples: 400
  # the number of images to display in Tensorboard while training
  num_visualizations: 20

  # Note: The below line limits the evaluation process to 10 evaluations.
  # Remove the below line to evaluate indefinitely.
  #max_evals: 10
}

eval_input_reader: {
  tf_record_input_reader {
      
    #path to the testing TFRecord
    input_path: "/gdrive/My Drive/knife_detection/data/test_labels.record"
  }
  #path to the label map 
  label_map_path: "/gdrive/My Drive/knife_detection/data/label_map.pbtxt"
  shuffle: false
  num_readers: 1
}

In [None]:
# where the model will be saved at each checkpoint while training 
model_dir = 'training/'

!rm -rf {model_dir}
os.makedirs(model_dir, exist_ok=True)

# Downloading and Configuring Tensorboard

In [None]:
#downlaoding ngrok to be able to access tensorboard on google colab
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip -o ngrok-stable-linux-amd64.zip

In [None]:
#the logs that are created while training 
LOG_DIR = '/gdrive/My\ Drive/knife_detection/models/research/object_detection/training/'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 6006 &')

In [None]:
#The link to tensorboard.
#works after the training starts.

### note: if you didnt get a link as output, rerun this cell and the one above
!curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [None]:
# !saved_model_cli show --dir /gdrive/My\ Drive/knife_detection/models/research/object_detection/training/export/Servo/1598887811 \
#   --tag_set serve --signature_def serving_default

%load_ext tensorboard
%tensorboard --logdir /gdrive/My\ Drive/knife_detection/models/research/object_detection/training/

## Start Training

In [None]:
%cd /gdrive/My\ Drive/knife_detection/models/research/object_detection

In [None]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
print(tf.__version__)
tf.test.gpu_device_name()
!nvcc --version

In [None]:
!python3 model_main.py \
    --pipeline_config_path={model_pipline}\
    --model_dir={model_dir} \
    --alsologtostderr \