参考：  https://towardsdatascience.com/train-an-object-detector-using-tensorflow-2-object-detection-api-in-2021-a4fed450d1b9

# 事前準備
ランタイム→ランタイムのタイプを変更を選択、ハードウェアアクセラレータを「GPU」に変更しておく。
Tensorflowのバージョンは2.xであることを確認しておく。
# GoogleDriveのマウント
GoogleDrive上にファイルを保存する場合は以下のセルを実行してカレントディレクトリを移動しておく。    
ただし、3GByte程度消費するので空き容量に注意。  
不要な場合はスキップする。    


In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive

# ワークディレクトリの作成と移動

In [None]:
import sys
import os

!mkdir -p hand_detect
%cd hand_detect

# 現在のディレクトリ
CUR_DIR = os.getcwd()

# object-detection モジュールのインストール
## gitリポジトリのclone

In [None]:
%cd $CUR_DIR
!git clone https://github.com/tensorflow/models.git

## プロトコルバッファのコンパイル

In [None]:
%cd $CUR_DIR/models/research
!protoc object_detection/protos/*.proto --python_out=.

## モジュールのインストール

In [None]:
%cd $CUR_DIR/models/research
!cp object_detection/packages/tf2/setup.py . 
!python -m pip install .

## テスト

In [None]:
%cd $CUR_DIR/models/research
!python object_detection/builders/model_builder_tf2_test.py

# データセットのダウンロード
## gitリポジトリのclone

In [None]:
%cd $CUR_DIR
!git clone https://github.com/aalpatya/detect_hands.git

##  データのダウンロードとcsvファイルの生成

In [None]:
!cp detect_hands/egohands_dataset_to_csv.py .
!python egohands_dataset_to_csv.py

## tf_recordsの生成

In [None]:
%cd $CUR_DIR
!cp detect_hands/generate_tfrecord.py .
!python generate_tfrecord.py --csv_input=images/train/train_labels.csv  --output_path=train.record
!python generate_tfrecord.py --csv_input=images/test/test_labels.csv  --output_path=test.record

# 元となるモデルのダウンロード

元になるモデルファイルは以下を参照  
https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md


In [None]:
%cd $CUR_DIR
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz
!tar -xzvf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz

## pipline.txtの修正
学習処理に合わせて修正する

In [None]:
%cd $CUR_DIR

from object_detection.protos import pipeline_pb2
from google.protobuf import text_format
import tensorflow.compat.v1 as tf

# CONFIGファイル名
CONFIG_FILE = CUR_DIR + "/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config"
ORG_FILE    = CONFIG_FILE + ",org"
# CHECKPOINTファイル
CKPT_FILE   = CUR_DIR + "/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0"
# 学習済みモデル出力ディレクトリ
TRAINED_DIR = CUR_DIR + "/output_training"

# CONFIGファイルのバックアップ
if not os.path.exists(ORG_FILE) :
  !cp $CONFIG_FILE $ORG_FILE

# オリジナルファイル読み込み
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.gfile.GFile(CONFIG_FILE, "r") as f:
  proto_str = f.read()
  text_format.Merge(proto_str, pipeline_config)

# パラメータ変更
pipeline_config.model.ssd.num_classes                                      = 1

pipeline_config.train_config.batch_size                                    = 4
pipeline_config.train_config.fine_tune_checkpoint                          = CKPT_FILE
pipeline_config.train_config.fine_tune_checkpoint_type                     = "detection"

pipeline_config.train_input_reader.label_map_path                          = CUR_DIR + "/detect_hands/model_data/ssd_mobilenet_v2_fpn_320/label_map.pbtxt"
pipeline_config.train_input_reader.tf_record_input_reader.input_path[0]    = CUR_DIR + "/train.record"

pipeline_config.eval_input_reader[0].label_map_path                        = CUR_DIR + "/detect_hands/model_data/ssd_mobilenet_v2_fpn_320/label_map.pbtxt"
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[0]  = CUR_DIR + "/test.record"

# 変更後データの書き込み
pipeline_text = text_format.MessageToString(pipeline_config)
with tf.gfile.Open(CONFIG_FILE, "wb") as f:
  f.write(pipeline_text)


# TensorBoardの起動

In [None]:
%load_ext tensorboard
%tensorboard --logdir=$TRAINED_DIR/train

# 学習の実行
メインイベント  
ちょっと時間がかかるのでお茶でも飲んでてください(数時間のオーダー)

In [None]:
%cd $CUR_DIR/models/research/object_detection/

!python model_main_tf2.py \
--pipeline_config_path=$CONFIG_FILE \
--model_dir=$TRAINED_DIR \
--alsologtostderr

# モデルのエクスポート
生成したZIPファイルをダウンロードしてください。  

In [None]:
%cd $CUR_DIR/models/research/object_detection
!python exporter_main_v2.py \
--trained_checkpoint_dir=$TRAINED_DIR \
--pipeline_config_path=$CONFIG_FILE \
--output_directory $CUR_DIR/inference


In [None]:
%cd $CUR_DIR
EXPORT_DIR = "inference"

LABEL_MAP = 'detect_hands/model_data/ssd_mobilenet_v2_fpn_320/label_map.pbtxt'

!cp $LABEL_MAP $EXPORT_DIR

import datetime
zip_filename = datetime.datetime.now().strftime('hand_detect_%Y%m%d_%H%M%S.zip')
!zip -r $zip_filename $EXPORT_DIR

# テスト

In [None]:
# テスト用画像ファイルのダウンロード
!wget https://cdn.amebaowndme.com/madrid-prd/madrid-web/images/sites/483796/1357355de6edbc4c4b54d22faf0b0756_ce052e9b134a9dbb047a8e17c890832a.jpg -O a.jpg
!wget https://cdn.amebaowndme.com/madrid-prd/madrid-web/images/sites/483796/564b6ca69e9022aa1977f335a148a05a_2d642c807aaf8f5b972a0a406903447d.jpg -O b.jpg


In [None]:
import os
import sys
import cv2

import numpy as np
import tensorflow as tf

from PIL import Image
from IPython.display import display

from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# patch tf1 into `utils.ops`
utils_ops.tf = tf.compat.v1

# Patch the location of gfile
tf.gfile = tf.io.gfile

# ラベルマップのロード
PATH_TO_LABELS = CUR_DIR + '/detect_hands/model_data/ssd_mobilenet_v2_fpn_320/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

# テスト用イメージファイル
TEST_IMAGE_PATHS = [
                        "a.jpg", 
                        "b.jpg",
                    ]

# モデルのロード
detection_model = tf.saved_model.load(CUR_DIR + "/inference/saved_model")

# Check the model's input signature, it expects a batch of 3-color images of type uint8:
print(detection_model.signatures['serving_default'].inputs)

# And returns several outputs:
print(detection_model.signatures['serving_default'].output_dtypes)
print(detection_model.signatures['serving_default'].output_shapes)

# 認識処理関数
def run_inference_for_single_image(model, image):
  image = np.asarray(image)
  # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
  input_tensor = tf.convert_to_tensor(image)
  # The model expects a batch of images, so add an axis with `tf.newaxis`.
  input_tensor = input_tensor[tf.newaxis,...]

  # Run inference
  model_fn = model.signatures['serving_default']
  output_dict = model_fn(input_tensor)

  # All outputs are batches tensors.
  # Convert to numpy arrays, and take index [0] to remove the batch dimension.
  # We're only interested in the first num_detections.
  num_detections = int(output_dict.pop('num_detections'))
  output_dict = {key:value[0, :num_detections].numpy() 
                 for key,value in output_dict.items()}
  output_dict['num_detections'] = num_detections

  # detection_classes should be ints.
  output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
   
  # Handle models with masks:
  if 'detection_masks' in output_dict:
    # Reframe the the bbox mask to the image size.
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              output_dict['detection_masks'], output_dict['detection_boxes'],
               image.shape[0], image.shape[1])      
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                                       tf.uint8)
    output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
    
  return output_dict

# 認識処理と表示
def show_inference(model, image_path):
  # 画像の読み込み
  image_np = np.array(Image.open(image_path))
  
  # 認識実行
  output_dict = run_inference_for_single_image(model, image_np)
  
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks_reframed', None),
      use_normalized_coordinates=True,
      line_thickness=8)
  
  # 表示
  display(Image.fromarray(image_np))
  # ～～～ 単独実行するときの表示処理はこちら ～～～
  # new_image = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
  # cv2.imshow("Detection Results", new_image)  
  # cv2.waitKey(0)
  # cv2.destroyAllWindows()

# 実行
for image_path in TEST_IMAGE_PATHS:
  show_inference(detection_model, image_path)

