<table width="100%" border="3">
  <tbody>
    <tr>
      <td width=120><img src="https://is4-ssl.mzstatic.com/image/thumb/Purple128/v4/9f/38/cb/9f38cb51-c606-6fd4-54b3-47bfdb06b712/source/256x256bb.jpg" alt="Aidea" width="120"/></td>
      <td align='left'><h1>TensorFlow 2 Object Detection</h1><BR><h2>D2-Train object detection model for MBSH</h2></td>
    </tr>
  </tbody>
</table>


# Mosquito Breeding Site Hunting for Dengue Fever Control

Dengue fever is an acute infectious disease transmitted by mosquito. The peak time of dengue fever outbreak in Taiwan is usually at summertime. Mild clinical cases of dengue fever may present as symptoms such as fever, headaches, and myalgia while severe cases may have severe fluid leakage, hemorrhagic symptoms, shock, organ failure, coma and even death. The mortality rate can be as high as 20% or more if the patient does not receive proper treatment in time.

__IEEE ICIP 2019 Grand Challenge__ https://aidea-web.tw/icip
![alt MBSH database](https://aidea-web.tw/images/web/ICIP-2019-LOGO-s.png)


# Part 1: Configure the Colab environment to execute Object Detection API with TensorFlow 2

## Step 1: Mount google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

## Step 5: Install packages

In [None]:
!pip install tf_slim
!pip install lvis
!pip install tensorflow-addons

## Step 6: Set up the working environment

In [None]:
%env PYTHONPATH=/env/python:/content/drive/My Drive/models:/content/drive/My Drive/models/research:/content/drive/My Drive/models/research/slim

In [None]:
import sys
sys.path.append('/content/drive/My Drive/models')

In [None]:
from os import chdir
chdir("/content/drive/My Drive/models/research/")

## Step 7: Test Object Detection API


In [None]:
!python object_detection/builders/model_builder_tf2_test.py

# Part 2: (IPO-I) input data

## Step 1: Create tfrecords folder

In [None]:
%%bash
cd models/research/
mkdir data data/tfrecords

## Step 2: Download data from the cloud drive
If the download fails, you can upload it yourself

https://drive.google.com/file/d/1_lh-QnjvPc4v9EDhisuwCTVZAUlKE6lR/view?usp=sharing

In [None]:
!gdown --id 1_lh-QnjvPc4v9EDhisuwCTVZAUlKE6lR
!unzip mosquito.zip



In [None]:
!pwd

In [None]:
!ls *cdc

In [None]:
%%bash
mv test_cdc models/research/data
mv train_cdc models/research/data

In [None]:
!ls models/research/data

## Step 3: Convert annotations into a csv file

https://github.com/EdjeElectronics/TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10/blob/master/xml_to_csv.py

In [None]:
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

In [None]:
from os import chdir
chdir("/content/drive/My Drive/models/research/")

In [None]:
!pwd

In [None]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
xml_list = []
for xml_file in glob.glob('data/train_cdc/train_annotations/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
        value = (root.find('filename').text,
                  int(root.find('size')[0].text),
                  int(root.find('size')[1].text),
                  member[0].text,
                  int(member[1][0].text),
                  int(member[1][1].text),
                  int(member[1][2].text),
                  int(member[1][3].text)
                  )
        xml_list.append(value)

column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
xml_df.to_csv(('data/train_cdc/train_labels.csv'), index=None)


## Step 4: Convert training data to TFRecord

Ref.
https://github.com/EdjeElectronics/TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10/blob/master/generate_tfrecord.py

In [None]:
from os import chdir
chdir("/content/drive/My Drive/models/research/")

In [None]:
# TO-DO replace this with label map
def class_text_to_int(row_label):
    if row_label == 'aquarium':#水族箱
        return 1
    elif row_label == 'bottle':#瓶瓶罐罐、廢電瓶瓶罐
        return 2
    elif row_label == 'bowl':#碗、盆
        return 3
    elif row_label == 'box':#盒子
        return 4
    elif row_label == 'bucket':#桶子
        return 5
    elif row_label == 'plastic_bag':#塑膠袋、帆布
        return 6
    if row_label == 'plate':#盤
        return 7
    elif row_label == 'styrofoam':#保麗龍
        return 8
    elif row_label == 'tire':#廢輪胎
        return 9
    elif row_label == 'toilet':#廢馬桶
        return 10
    elif row_label == 'tub':#浴缸
        return 11
    elif row_label == 'washing_machine':#洗衣槽
        return 12
    elif row_label == 'water_tower':#水塔
        return 13    
    else:
        None

In [None]:
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


```
Usage:
  # From tensorflow/models/
  # Create train data:
  python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record
  # Create test data:
  python generate_tfrecord.py --csv_input=images/test_labels.csv  --image_dir=images/test --output_path=test.record
```

In [None]:
!ls data/train_cdc/

In [None]:
output_path = "data/tfrecords/mbsh_train.tfrecord"
csv_path = "data/train_cdc/train_labels.csv"
image_dir = "data/train_cdc/train_images"

writer = tf.io.TFRecordWriter(output_path)
examples = pd.read_csv(csv_path)
grouped = split(examples, 'filename')
for group in grouped:
    tf_example = create_tf_example(group, image_dir)
    writer.write(tf_example.SerializeToString())
writer.close()


In [None]:
!ls -l data/tfrecords/

# Part 3: Preparation of Object Detection model 

## Step 1: Create Label map

In [None]:
!cp object_detection/data/pet_label_map.pbtxt object_detection/data/mbsh_label_map.pbtxt

## Step 2: Edit Label map
```
item {
  id: 1
  name: 'aquarium'
}
item {
  id: 2
  name: 'bottle'
}
item {
  id: 3
  name: 'bowl'
}
item {
  id: 4
  name: 'box'
}
item {
  id: 5
  name: 'bucket'
}
item {
  id: 6
  name: 'plastic_bag'
}
item {
  id: 7
  name: 'plate'
}
item {
  id: 8
  name: 'Styrofoam'
}
item {
  id: 9
  name: 'tire'
}
item {
  id: 10
  name: 'toilet'
}
item {
  id: 11
  name: 'tub'
}
item {
  id: 12
  name: 'washing_machine'
}
item {
  id: 13
  name: 'water_tower'
}
```


In [None]:
!ls object_detection/data/

## Step 3: Configure pipeline
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/configuring_jobs.md
```
#line 3:
    num_classes: 13
#line 134:
    batch_size: 4
#151:
    learning_rate_base: 0.008
    total_steps: 100000
#line 161:
      fine_tune_checkpoint: "efficientdet_d0_coco17_tpu-32/checkpoint/ckpt-0"
#line 162:
        num_steps: 10000
#line 167:
          fine_tune_checkpoint_type: "detection"
#line 172:
        label_map_path: "object_detection/data/mbsh_label_map.pbtxt"
#line 174:
        input_path: "data/tfrecords/mbsh_train.tfrecord"
#
```


In [None]:
!cp efficientdet_d0_coco17_tpu-32/pipeline.config data/mbsh_pipeline.config

In [None]:
!ls data

# Part 4: IPO-P: Training (about 7-8 hours)

In [None]:
!python object_detection/model_main_tf2.py \
--alsologtostderr \
--pipeline_config_path=data/mbsh_pipeline.config \
--model_dir=model

# Part 5: IPO-P: Evaluation

## Step 1: Setup 

In [None]:
import tensorflow as tf

import matplotlib
import matplotlib.pyplot as plt

import io
import os
import pathlib
import random
import scipy.misc
import numpy as np
from six import BytesIO
from PIL import Image, ImageDraw, ImageFont

from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

%matplotlib inline

In [None]:
def load_image_into_numpy_array(path):
  #Load an image from file into a numpy array.
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  image = image.convert("RGB")
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

def get_keypoint_tuples(eval_config):
  """Return a tuple list of keypoint edges from the eval config.
  
  Args:
    eval_config: an eval config containing the keypoint edges
  
  Returns:
    a list of edge tuples, each in the format (start, end)
  """
  tuple_list = []
  kp_list = eval_config.keypoint_edge
  for edge in kp_list:
    tuple_list.append((edge.start, edge.end))
  return tuple_list

## Step 2: COnfigure pipeline

In [None]:
import os
pipeline_config = 'data/mbsh_pipeline.config'
model_dir = 'model''

# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(pipeline_config)

In [None]:
def get_model_detection_function(model):
  """Get a tf.function for detection."""
  @tf.function
  def detect_fn(image):
    """Detect objects in image."""
    image, shapes = model.preprocess(image)
    prediction_dict = model.predict(image, shapes)
    detections = model.postprocess(prediction_dict, shapes)
    return detections, prediction_dict, tf.reshape(shapes, [-1])

  return detect_fn

In [None]:
model_config = configs['model']
detection_model = model_builder.build(model_config=model_config, is_training=False)
detect_fn = get_model_detection_function(detection_model)

In [None]:
# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(model_dir, 'ckpt-xxx')).expect_partial()

In [None]:
label_map_path = configs['eval_input_config'].label_map_path
label_map = label_map_util.load_labelmap(label_map_path)
categories = label_map_util.convert_label_map_to_categories(
    label_map,
    max_num_classes=label_map_util.get_max_label_map_index(label_map),
    use_display_name=True)
category_index = label_map_util.create_category_index(categories)
label_map_dict = label_map_util.get_label_map_dict(label_map, use_display_name=True)

## Step 3: Predict with trained model


In [None]:
import os
import glob
import pandas as pd
Threshold = 0.3
classname = ['Nan', 'aquarium', 'bottle', 'bowl', 'box', 'bucket', 'plastic_bag', 
             'plate', 'styrofoam', 'tire', 'toilet', 'tub', 'washing_machine',
             'water_tower']
label_id_offset = 1
files = sorted(glob.glob(data/train_cdc/train_images/*.jpg'))
blist = []
for img_file in files:
  fname = os.path.basename(img_file)
  print(f"file:{fname} ", end="")
  image_np = load_image_into_numpy_array(img_file)
  width  = image_np.shape[1]
  height = image_np.shape[0]


  input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
  detections, predictions_dict, shapes = detect_fn(input_tensor)

  scores  =detections['detection_scores'][0].numpy()
  label_ids=detections['detection_classes'][0].numpy().astype(int) + label_id_offset
  boxes    =detections['detection_boxes'][0].numpy()

  bcount = 0;
  for score in list(scores):
    if score < Threshold:
      break
    bcount += 1
  if bcount==0 and scores[0] >0.15:
    bcount = 1
  print(f" count={bcount}")
  
  for i in range(bcount):
    score = scores[i]
    label_id = label_ids[i].astype(int)
    label_name = classname[label_id]
    box = boxes[i]
    xmin = round(box[0]*width)
    xmax = round(box[2]*width)
    ymin = round(box[1]*height)
    ymax = round(box[3]*height)
    record = (fname, width, height, label_name, xmin, ymin, xmax, ymax)
    blist.append(record)


## Step 4:IPO-O: output result


In [None]:
column_name = ["filename", "width", "height", "class", "xmin", "ymin", "xmax", "ymax"]
df = pd.DataFrame(blist, columns=column_name)
df.to_csv(('data/labels_train_hat.csv'), index=None)