# 행동 스티커 만들기

### 데이터셋 전처리

In [1]:
import csv
import io
import json
import os

os.environ["CUDA_VISIBLE_DEVICES"] = '-1' #CPU 사용
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

workdir = os.path.join(os.getenv('HOME'),'aiffel/mpii')
os.chdir(workdir)

from loguru import logger
from PIL import Image
import ray
import tensorflow as tf

#### json 파싱
앞서 다운 받은 `train.json`과 `validation.json`은 이미지에 담겨 있는 사람들의 pose keypoint 정보들을 가지고 있음. 이는 Pose Estimation을 위한 label로 삼을 수 있음.  
이 json파일들이 어떻게 구성되어 있는지 확인하기 위해 샘플로 annotation정보를 1개만 출력함. 

In [2]:
import json, os

json_file_path = os.getenv('HOME')+'/aiffel/mpii/mpii_human_pose_v1_u12_2/train.json'

with open(json_file_path) as train_json:
    train_annos = json.load(train_json)
    json_formatted_str = json.dumps(train_annos[0], indent=2) # json beautify
    print(json_formatted_str)

{
  "joints_vis": [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1
  ],
  "joints": [
    [
      620.0,
      394.0
    ],
    [
      616.0,
      269.0
    ],
    [
      573.0,
      185.0
    ],
    [
      647.0,
      188.0
    ],
    [
      661.0,
      221.0
    ],
    [
      656.0,
      231.0
    ],
    [
      610.0,
      187.0
    ],
    [
      647.0,
      176.0
    ],
    [
      637.0201,
      189.8183
    ],
    [
      695.9799,
      108.1817
    ],
    [
      606.0,
      217.0
    ],
    [
      553.0,
      161.0
    ],
    [
      601.0,
      167.0
    ],
    [
      692.0,
      185.0
    ],
    [
      693.0,
      240.0
    ],
    [
      688.0,
      313.0
    ]
  ],
  "image": "015601864.jpg",
  "scale": 3.021046,
  "center": [
    594.0,
    257.0
  ]
}


**`joints`** 

`joints`가 label로 사용할 keypoint의 label임.  
이미지 형상과 사람의 포즈에 따라 모든 label이 이미지에 나타나지 않기 떄문에 `joints_vis`를 이용해서 실제로 사용할 수 있는 keypoint인지를 나타냄.  
`joints`의 순서는 다음과 같음.  
0 - 오른쪽 발목  
1 - 오른쪽 무릎  
2 - 오른쪽 엉덩이  
3 - 왼쪽 엉덩이  
4 - 왼쪽 무릎  
5 - 왼쪽 발목  
6 - 골반  
7 - 가슴(흉부)  
8 - 목  
9 - 머리 위  
10 - 오른쪽 손목  
11 - 오른쪽 팔꿈치  
12 - 오른쪽 어깨  
13 - 왼쪽 어깨  
14 - 왼쪽 팔꿈치  
15 - 왼쪽 손목  

**`scale`**  
높이 = scale * 200px  
scale정보가 coco dataset에는 scale 값 또한 2차원으로 주어져서 bbox scale이 나오지만 mpii는 높이만 나옴   


**`center`**  
사람의 중심점을 의미

In [3]:
# json annotation을 파싱하는 함수 
# image의 전체 path를 묶어 dict 타입의 label로 만듬. -> 이 label을 통해 학습 수행
def parse_one_annotation(anno, image_dir):
    filename = anno['image']
    joints = anno['joints']
    joints_visibility = anno['joints_vis']
    annotation = {
        'filename': filename,
        'filepath': os.path.join(image_dir, filename),
        'joints_visibility': joints_visibility,
        'joints': joints,
        'center': anno['center'],
        'scale' : anno['scale']
    }
    return annotation

### tfrecord 파일 만들기
일반적으로 학습 과정에서 gpu의 연산 속도보다 HDD I/O의 속도가 느리기 때문에 병목 현상이 발생하고 효율성이 떨어지는 것을 관찰할 수 있음.  
따라서 "학습 데이터를 어떻게 빠르게 읽는가"에 대한 고민이 생김.  

학습 속도를 향상시키기 위해서 data read(또는 prefetch) 또는 데이터 변환 단계에서 gpu학습과 병렬적으로 수행되도록 prefetch를 적용해야함.  
수행방법은 tf.data의 map함수를 이요하고 cache에 저장해두는 방법을 사용함.  

tf는 데이터셋을 tfrecord 형태로 표현함으로써 위 변환을 자동화 해줌.  
`tfrecord`는 binary record sequence를 저장하기 위한 형식으로, 내부적으로는 protocol buffer를 이용함.  

protobuf 는 크로스플랫폼에서 사용할 수 있는 직렬화 데이터 라이브러리라고 생각하면 됨.  데이터셋 크기가 크기 때문에 빠른 학습을 위해서 이 정보를 tfrecord 파일로 변환함.  

- annotation 을 total_shards 개수로 나눔(chunkify) (train : 64개, val : 8개)
- build_single_tfrecord 함수를 통해 tfrecord 로 저장
- 각 chunk 끼리 dependency 가 없기 때문에 병렬처리가 가능, ray를 사용


In [4]:
import ray

def build_tf_records(annotations, total_shards, split):
    chunks = chunkify(annotations, total_shards)
    futures = [
        # train_0001_of_0064.tfrecords
        build_single_tfrecord.remote(
            chunk, './tfrecords_mpii/{}_{}_of_{}.tfrecords'.format(
                split,
                str(i + 1).zfill(4),
                str(total_shards).zfill(4),
            )) for i, chunk in enumerate(chunks)
    ]
    ray.get(futures)

**annotation을 적절한 개수로 나누는 함수 `chunkify`** 
- l 은 annotation, n은 shard 개수
- shard 개수 단위로 annotation list 를 나누어서 새로운 list를 만듭니다.
- numpy array 라고 가정하면 (size, shard, anno_content) 정도의 shape을 가짐. 

**tfrecord 1개를 저장하는 함수 `build_single_tfrecord`**
- TFRecordWriter 를 이용해서 anno_list 를 shard 개수 단위로 작성함.
- generate_tfexample 함수를 사용함.
- [중요] write 할 때 string 으로 serialize 해야함.

**tf.example만드는 `generate_tfexample`**
- 우리가 정의한 json 의 python type의 값들을 tfexample 에 사용할 수 있는 값으로 변환함.
- image 파일은 byte 로 변환합니다. bitmap 으로 저장하게되면 파일용량이 상당히 커지기 때문에 만약 jpeg 타입이 아닌 경우 jpeg 으로 변환 후 content 로 불러서 저장함. (H,W,C)
- 각 label 값을 tf.train.Feature 로 저장합니다. 이 때 데이터 타입에 주의해야 함.
- 이미지는 byte 인코딩 된 값을 그대로 넣음.


### Ray
Ray는 파이썬을 위한 간단한 분산 어플리케이션 api임.  
참고자료 : [https://docs.ray.io/en/latest/](https://docs.ray.io/en/latest/)  

위 내용들을 모두 하나의 파일로 정리하면 다음과 같음.  

**tfrecords_mpii.py**

In [5]:
import csv
import io
import json
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from loguru import logger
from PIL import Image
import ray
import tensorflow as tf

num_train_shards = 64
num_val_shards = 8
ray.init()
tf.get_logger().setLevel('ERROR')


def chunkify(l, n):
    size = len(l) // n
    start = 0
    results = []
    for i in range(n - 1):
        results.append(l[start:start + size])
        start += size
    results.append(l[start:])
    return results


def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy(
        )  # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def generate_tfexample(anno):
    filename = anno['filename']
    filepath = anno['filepath']
    with open(filepath, 'rb') as image_file:
        content = image_file.read()

    image = Image.open(filepath)
    if image.format != 'JPEG' or image.mode != 'RGB':
        image_rgb = image.convert('RGB')
        with io.BytesIO() as output:
            image_rgb.save(output, format="JPEG", quality=95)
            content = output.getvalue()

    width, height = image.size
    depth = 3

    c_x = int(anno['center'][0])
    c_y = int(anno['center'][1])
    scale = anno['scale']

    # x = [
    #     joint[0] / width if joint[0] >= 0 else joint[0]
    #     for joint in anno['joints']
    # ]
    # y = [
    #     joint[1] / height if joint[1] >= 0 else joint[0]
    #     for joint in anno['joints']
    # ]
    x = [
        int(joint[0]) if joint[0] >= 0 else int(joint[0]) 
        for joint in anno['joints']
    ]
    y = [
        int(joint[1]) if joint[1] >= 0 else int(joint[0]) 
        for joint in anno['joints']
    ]
    # 0 - invisible, 1 - occluded, 2 - visible
    v = [0 if joint_v == 0 else 2 for joint_v in anno['joints_visibility']]

    feature = {
        'image/height':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
        'image/width':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
        'image/depth':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[depth])),
        'image/object/parts/x':
        tf.train.Feature(int64_list=tf.train.Int64List(value=x)),
        'image/object/parts/y':
        tf.train.Feature(int64_list=tf.train.Int64List(value=y)),
        'image/object/center/x': 
        tf.train.Feature(int64_list=tf.train.Int64List(value=[c_x])),
        'image/object/center/y': 
        tf.train.Feature(int64_list=tf.train.Int64List(value=[c_y])),
        'image/object/scale':
        tf.train.Feature(float_list=tf.train.FloatList(value=[scale])),
        # 'image/object/parts/x':
        # tf.train.Feature(float_list=tf.train.FloatList(value=x)),
        # 'image/object/parts/y':
        # tf.train.Feature(float_list=tf.train.FloatList(value=y)),
        'image/object/parts/v':
        tf.train.Feature(int64_list=tf.train.Int64List(value=v)),
        'image/encoded':
        _bytes_feature(content),
        'image/filename':
        _bytes_feature(filename.encode())
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))


@ray.remote
def build_single_tfrecord(chunk, path):
    print('start to build tf records for ' + path)

    with tf.io.TFRecordWriter(path) as writer:
        for anno_list in chunk:
            tf_example = generate_tfexample(anno_list)
            writer.write(tf_example.SerializeToString())

    print('finished building tf records for ' + path)


def build_tf_records(annotations, total_shards, split):
    chunks = chunkify(annotations, total_shards)
    futures = [
        # train_0001_of_0064.tfrecords
        build_single_tfrecord.remote(
            chunk, './tfrecords_mpii/{}_{}_of_{}.tfrecords'.format(
                split,
                str(i + 1).zfill(4),
                str(total_shards).zfill(4),
            )) for i, chunk in enumerate(chunks)
    ]
    ray.get(futures)


def parse_one_annotation(anno, image_dir):
    filename = anno['image']
    joints = anno['joints']
    joints_visibility = anno['joints_vis']
    annotation = {
        'filename': filename,
        'filepath': os.path.join(image_dir, filename),
        'joints_visibility': joints_visibility,
        'joints': joints,
        'center': anno['center'],
        'scale' : anno['scale']
    }
    return annotation


def main():
    print('Start to parse annotations.')
    if not os.path.exists('./tfrecords_mpii'):
        os.makedirs('./tfrecords_mpii')

    with open(workdir + '/mpii_human_pose_v1_u12_2/train.json') as train_json:
        train_annos = json.load(train_json)
        train_annotations = [
            parse_one_annotation(anno, './images/')
            for anno in train_annos
        ]
        print('First train annotation: ', train_annotations[0])
        del (train_annos)

    with open(workdir + '/mpii_human_pose_v1_u12_2/validation.json') as val_json:
        val_annos = json.load(val_json)
        val_annotations = [
            parse_one_annotation(anno, './images/') for anno in val_annos
        ]
        print('First val annotation: ', val_annotations[0])
        del (val_annos)

    print('Start to build TF Records.')
    build_tf_records(train_annotations, num_train_shards, 'train')
    build_tf_records(val_annotations, num_val_shards, 'val')

    print('Successfully wrote {} annotations to TF Records.'.format(
        len(train_annotations) + len(val_annotations)))


if __name__ == '__main__':
    main()

2021-05-21 19:58:03,588	INFO services.py:1269 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Start to parse annotations.
First train annotation:  {'filename': '015601864.jpg', 'filepath': './images/015601864.jpg', 'joints_visibility': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'joints': [[620.0, 394.0], [616.0, 269.0], [573.0, 185.0], [647.0, 188.0], [661.0, 221.0], [656.0, 231.0], [610.0, 187.0], [647.0, 176.0], [637.0201, 189.8183], [695.9799, 108.1817], [606.0, 217.0], [553.0, 161.0], [601.0, 167.0], [692.0, 185.0], [693.0, 240.0], [688.0, 313.0]], 'center': [594.0, 257.0], 'scale': 3.021046}
First val annotation:  {'filename': '005808361.jpg', 'filepath': './images/005808361.jpg', 'joints_visibility': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'joints': [[804.0, 711.0], [816.0, 510.0], [908.0, 438.0], [1040.0, 454.0], [906.0, 528.0], [883.0, 707.0], [974.0, 446.0], [985.0, 253.0], [982.7591, 235.9694], [962.2409, 80.0306], [869.0, 214.0], [798.0, 340.0], [902.0, 253.0], [1067.0, 253.0], [1167.0, 353.0], [1142.0, 478.0]], 'center': [966.0, 340.0], 'scale': 4.7

[2m[36m(pid=3354)[0m finished building tf records for ./tfrecords_mpii/train_0032_of_0064.tfrecords
[2m[36m(pid=3354)[0m start to build tf records for ./tfrecords_mpii/train_0043_of_0064.tfrecords
[2m[36m(pid=3356)[0m finished building tf records for ./tfrecords_mpii/train_0031_of_0064.tfrecords
[2m[36m(pid=3356)[0m start to build tf records for ./tfrecords_mpii/train_0044_of_0064.tfrecords
[2m[36m(pid=3351)[0m finished building tf records for ./tfrecords_mpii/train_0033_of_0064.tfrecords
[2m[36m(pid=3351)[0m start to build tf records for ./tfrecords_mpii/train_0045_of_0064.tfrecords
[2m[36m(pid=3360)[0m finished building tf records for ./tfrecords_mpii/train_0034_of_0064.tfrecords
[2m[36m(pid=3360)[0m start to build tf records for ./tfrecords_mpii/train_0046_of_0064.tfrecords
[2m[36m(pid=3355)[0m finished building tf records for ./tfrecords_mpii/train_0035_of_0064.tfrecords
[2m[36m(pid=3355)[0m start to build tf records for ./tfrecords_mpii/train_0047_of_

In [6]:
# 약 200mb 정도의 tfrecords들이 72개 만들어진 것을 확인할 수 있음. 
%ls | wc

[2m[36m(pid=3360)[0m finished building tf records for ./tfrecords_mpii/val_0006_of_0008.tfrecords
     17      17     391


### data label로 만들기
tfrecords 파일을 읽고 전처리를 할 수 있는 dataloader를 만듬.  

**`Preprocessor` class**  
####  `__call__()` 메소드  
`Preprocessor` 클래스 코드의 `__call__()` 메소드 내부에서 진행되는 주요 과정은 다음과 같음.  
- tfrecord 파일이기 때문에 병렬로 읽는 것은 tf 가 지원해주고 있음. `self.parse_tfexample()` 에 구현되어 있고 이 함수를 통해 tf.tensor 로 이루어진 dictionary 형태의 features를 얻을 수 있음.  
- 즉 image 는 `features['image/encoded']` 형태로 사용할 수 있고 tfrecord 를 저장할 때 jpeg encoding 된 값을 넣었으므로 `tf.io.decode_jpeg()`로 decoding 하여 tensor 형태의 이미지를 얻음.  
- `crop_roi()` 메소드를 이용해 해당 이미지를 학습하기 편하도록 몇가지 트릭을 적용함. 
- `make_heatmaps()` 메소드를 이용해 label을 heatmap 으로 나타냄.


####  `parse_tfexample` 메소드  

- tfrecord 파일 형식을 우리가 저장한 data type feature 에 맞게 parsing 함.   
- tf 가 자동으로 parsing 해주는 점은 아주 편하지만 feature description 을 정확하게 알고 있어야하는 단점이 있음.   
- 즉, tfrecord 에서 사용할 key 값들과 data type 을 모르면 tfrecord 파일을 사용하기 굉장히 어려움. (serialize 되어있으므로..)

####  `crop_roi` 메소드  

- 얻은 image 와 label 을 이용해서 적절한 학습형태로 변환

#### `make_heatmaps` 메소드  

- 우리가 알고 있는 것은 joints의 위치, center의 좌표, body height값임. 균일하게 학습하기 위해 body width도 적절히 정하는 것도 중요함.  

- 높이 정보와 keypoint 위치를 이용해서 정사각형 박스를 사용하는 것을 기본으로 디자인 함. 이와 관련해서는 여러 방법이 있을 수 있지만, 우리가 임의로 조정한 crop box가 이미지 바깥으로 나가지 않는지 예외처리 하는 것을 더 중요하게 봄.  

- (x,y)좌표로 되어있는 keypoint를 heatmap으로 변경시킴. 
- 16개의 점을 generate_2d_gaussian() 함수를 이용해서 64x64 의 map 에 표현함. 


####  `generate_2d_guassian` 메소드   

- sigma 값이 1 이고 window size 7 인 필터를 이용해서 만듬.  

위 내용들을 하나의 py 파일로 정리하면 다음과 같음.  

**preprocess.py**

In [7]:
import tensorflow as tf


class Preprocessor(object):
    def __init__(self,
                 image_shape=(256, 256, 3),
                 heatmap_shape=(64, 64, 16),
                 is_train=False):
        self.is_train = is_train
        self.image_shape = image_shape
        self.heatmap_shape = heatmap_shape

    def __call__(self, example):
        features = self.parse_tfexample(example)
        image = tf.io.decode_jpeg(features['image/encoded'])

        if self.is_train:
            random_margin = tf.random.uniform([1], 0.1, 0.3)[0]
            image, keypoint_x, keypoint_y = self.crop_roi(image, features, margin=random_margin)
            image = tf.image.resize(image, self.image_shape[0:2])
        else:
            image, keypoint_x, keypoint_y = self.crop_roi(image, features)
            image = tf.image.resize(image, self.image_shape[0:2])

        image = tf.cast(image, tf.float32) / 127.5 - 1
        heatmaps = self.make_heatmaps(features, keypoint_x, keypoint_y)

        # print (image.shape, heatmaps.shape, type(heatmaps))

        return image, heatmaps


    def crop_roi(self, image, features, margin=0.2):
        img_shape = tf.shape(image)
        img_height = img_shape[0]
        img_width = img_shape[1]
        img_depth = img_shape[2]

        keypoint_x = tf.cast(tf.sparse.to_dense(features['image/object/parts/x']), dtype=tf.int32)
        keypoint_y = tf.cast(tf.sparse.to_dense(features['image/object/parts/y']), dtype=tf.int32)
        center_x = features['image/object/center/x']
        center_y = features['image/object/center/y']
        body_height = features['image/object/scale'] * 200.0

        masked_keypoint_x = tf.boolean_mask(keypoint_x, keypoint_x > 0)
        masked_keypoint_y = tf.boolean_mask(keypoint_y, keypoint_y > 0)

        keypoint_xmin = tf.reduce_min(masked_keypoint_x)
        keypoint_xmax = tf.reduce_max(masked_keypoint_x)
        keypoint_ymin = tf.reduce_min(masked_keypoint_y)
        keypoint_ymax = tf.reduce_max(masked_keypoint_y)

        xmin = keypoint_xmin - tf.cast(body_height * margin, dtype=tf.int32)
        xmax = keypoint_xmax + tf.cast(body_height * margin, dtype=tf.int32)
        ymin = keypoint_ymin - tf.cast(body_height * margin, dtype=tf.int32)
        ymax = keypoint_ymax + tf.cast(body_height * margin, dtype=tf.int32)

        effective_xmin = xmin if xmin > 0 else 0
        effective_ymin = ymin if ymin > 0 else 0
        effective_xmax = xmax if xmax < img_width else img_width
        effective_ymax = ymax if ymax < img_height else img_height
        effective_height = effective_ymax - effective_ymin
        effective_width = effective_xmax - effective_xmin

        image = image[effective_ymin:effective_ymax, effective_xmin:effective_xmax, :]
        new_shape = tf.shape(image)
        new_height = new_shape[0]
        new_width = new_shape[1]

        effective_keypoint_x = (keypoint_x - effective_xmin) / new_width
        effective_keypoint_y = (keypoint_y - effective_ymin) / new_height

        return image, effective_keypoint_x, effective_keypoint_y


    def generate_2d_guassian(self, height, width, y0, x0, visibility=2, sigma=1, scale=12):
        """
        "The same technique as Tompson et al. is used for supervision. A MeanSquared Error (MSE) loss is
        applied comparing the predicted heatmap to a ground-truth heatmap consisting of a 2D gaussian
        (with standard deviation of 1 px) centered on the keypoint location."

        https://github.com/princeton-vl/pose-hg-train/blob/master/src/util/img.lua#L204
        """
        heatmap = tf.zeros((height, width))

        # this gaussian patch is 7x7, let's get four corners of it first
        xmin = x0 - 3 * sigma
        ymin = y0 - 3 * sigma
        xmax = x0 + 3 * sigma
        ymax = y0 + 3 * sigma
        # if the patch is out of image boundary we simply return nothing according to the source code
        # [1]"In these cases the joint is either truncated or severely occluded, so for
        # supervision a ground truth heatmap of all zeros is provided."
        if xmin >= width or ymin >= height or xmax < 0 or ymax <0 or visibility == 0:
            return heatmap

        size = 6 * sigma + 1
        x, y = tf.meshgrid(tf.range(0, 6*sigma+1, 1), tf.range(0, 6*sigma+1, 1), indexing='xy')

        # the center of the gaussian patch should be 1
        center_x = size // 2
        center_y = size // 2

        # generate this 7x7 gaussian patch
        gaussian_patch = tf.cast(tf.math.exp(-(tf.square(x - center_x) + tf.math.square(y - center_y)) / (tf.math.square(sigma) * 2)) * scale, dtype=tf.float32)

        # part of the patch could be out of the boundary, so we need to determine the valid range
        # if xmin = -2, it means the 2 left-most columns are invalid, which is max(0, -(-2)) = 2
        patch_xmin = tf.math.maximum(0, -xmin)
        patch_ymin = tf.math.maximum(0, -ymin)
        # if xmin = 59, xmax = 66, but our output is 64x64, then we should discard 2 right-most columns
        # which is min(64, 66) - 59 = 5, and column 6 and 7 are discarded
        patch_xmax = tf.math.minimum(xmax, width) - xmin
        patch_ymax = tf.math.minimum(ymax, height) - ymin

        # also, we need to determine where to put this patch in the whole heatmap
        heatmap_xmin = tf.math.maximum(0, xmin)
        heatmap_ymin = tf.math.maximum(0, ymin)
        heatmap_xmax = tf.math.minimum(xmax, width)
        heatmap_ymax = tf.math.minimum(ymax, height)

        # finally, insert this patch into the heatmap
        indices = tf.TensorArray(tf.int32, 1, dynamic_size=True)
        updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)

        count = 0

        for j in tf.range(patch_ymin, patch_ymax):
            for i in tf.range(patch_xmin, patch_xmax):
                indices = indices.write(count, [heatmap_ymin+j, heatmap_xmin+i])
                updates = updates.write(count, gaussian_patch[j][i])
                count += 1

        heatmap = tf.tensor_scatter_nd_update(heatmap, indices.stack(), updates.stack())

        return heatmap


    def make_heatmaps(self, features, keypoint_x, keypoint_y):
        v = tf.cast(tf.sparse.to_dense(features['image/object/parts/v']), dtype=tf.float32)
        x = tf.cast(tf.math.round(keypoint_x * self.heatmap_shape[0]), dtype=tf.int32)
        y = tf.cast(tf.math.round(keypoint_y * self.heatmap_shape[1]), dtype=tf.int32)

        num_heatmap = self.heatmap_shape[2]
        heatmap_array = tf.TensorArray(tf.float32, 16)

        for i in range(num_heatmap):
            gaussian = self.generate_2d_guassian(self.heatmap_shape[1], self.heatmap_shape[0], y[i], x[i], v[i])
            heatmap_array = heatmap_array.write(i, gaussian)

        heatmaps = heatmap_array.stack()
        heatmaps = tf.transpose(heatmaps, perm=[1, 2, 0]) # change to (64, 64, 16)

        return heatmaps

    def parse_tfexample(self, example_proto):
        image_feature_description = {
            'image/height': tf.io.FixedLenFeature([], tf.int64),
            'image/width': tf.io.FixedLenFeature([], tf.int64),
            'image/depth': tf.io.FixedLenFeature([], tf.int64),
            'image/object/parts/x': tf.io.VarLenFeature(tf.int64),
            'image/object/parts/y': tf.io.VarLenFeature(tf.int64),
            'image/object/parts/v': tf.io.VarLenFeature(tf.int64),
            'image/object/center/x': tf.io.FixedLenFeature([], tf.int64),
            'image/object/center/y': tf.io.FixedLenFeature([], tf.int64),
            'image/object/scale': tf.io.FixedLenFeature([], tf.float32),
            'image/encoded': tf.io.FixedLenFeature([], tf.string),
            'image/filename': tf.io.FixedLenFeature([], tf.string),
        }
        return tf.io.parse_single_example(example_proto,
                                          image_feature_description)

### 모델 학습

#### Hourglass 모델 만들기

In [8]:
import tensorflow as tf

from tensorflow.keras.layers import Add, Concatenate, Lambda
from tensorflow.keras.layers import Input, Conv2D, ReLU, MaxPool2D
from tensorflow.keras.layers import UpSampling2D, ZeroPadding2D
from tensorflow.keras.layers import BatchNormalization

##### Residual block module

In [9]:
def BottleneckBlock(inputs, filters, strides=1, downsample=False, name=None):
    identity = inputs
    if downsample:
        identity = Conv2D(
            filters=filters,  # lift channels first
            kernel_size=1,
            strides=strides,
            padding='same',
            kernel_initializer='he_normal')(inputs)

    x = BatchNormalization(momentum=0.9)(inputs)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters // 2,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters // 2,
        kernel_size=3,
        strides=strides,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = Add()([identity, x])
    return x

##### Hourglass module

In [10]:
def HourglassModule(inputs, order, filters, num_residual):
    """
    https://github.com/princeton-vl/pose-hg-train/blob/master/src/models/hg.lua#L3
    """
    # Upper branch
    up1 = BottleneckBlock(inputs, filters, downsample=False)

    for i in range(num_residual):
        up1 = BottleneckBlock(up1, filters, downsample=False)

    # Lower branch
    low1 = MaxPool2D(pool_size=2, strides=2)(inputs)
    for i in range(num_residual):
        low1 = BottleneckBlock(low1, filters, downsample=False)

    low2 = low1
    if order > 1:
        low2 = HourglassModule(low1, order - 1, filters, num_residual)
    else:
        for i in range(num_residual):
            low2 = BottleneckBlock(low2, filters, downsample=False)

    low3 = low2
    for i in range(num_residual):
        low3 = BottleneckBlock(low3, filters, downsample=False)

    up2 = UpSampling2D(size=2)(low3)

    return up2 + up1

##### intermediate output을 위한 linear layer

In [11]:
def LinearLayer(inputs, filters):
    x = Conv2D(
        filters=filters,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(inputs)
    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    return x

#### Stacked Hourglass

In [12]:
def StackedHourglassNetwork(
        input_shape=(256, 256, 3), num_stack=4, num_residual=1,
        num_heatmap=16):
    """
    https://github.com/princeton-vl/pose-hg-train/blob/master/src/models/hg.lua#L33
    """
    inputs = Input(shape=input_shape)

    # initial processing of the image
    x = Conv2D(
        filters=64,
        kernel_size=7,
        strides=2,
        padding='same',
        kernel_initializer='he_normal')(inputs)
    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = BottleneckBlock(x, 128, downsample=True)
    x = MaxPool2D(pool_size=2, strides=2)(x)
    x = BottleneckBlock(x, 128, downsample=False)
    x = BottleneckBlock(x, 256, downsample=True)

    ys = []
    for i in range(num_stack):
        x = HourglassModule(x, order=4, filters=256, num_residual=num_residual)
        for i in range(num_residual):
            x = BottleneckBlock(x, 256, downsample=False)

        # predict 256 channels like a fully connected layer.
        x = LinearLayer(x, 256)

        # predict final channels, which is also the number of predicted heatmap
        y = Conv2D(
            filters=num_heatmap,
            kernel_size=1,
            strides=1,
            padding='same',
            kernel_initializer='he_normal')(x)
        ys.append(y)

        # if it's not the last stack, we need to add predictions back
        if i < num_stack - 1:
            y_intermediate_1 = Conv2D(filters=256, kernel_size=1, strides=1)(x)
            y_intermediate_2 = Conv2D(filters=256, kernel_size=1, strides=1)(y)
            x = Add()([y_intermediate_1, y_intermediate_2])

    return tf.keras.Model(inputs, ys, name='stacked_hourglass')

In [13]:
model = StackedHourglassNetwork()

### 학습 엔진 만들기
학습 코드 `train.py`를 구현.  
지금까지 제작한 `*.py` 모듈들은 여기서 참조(import)되어 사용될 것임. 

In [14]:
import math
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from datetime import datetime

import click
import tensorflow as tf

from hourglass104 import StackedHourglassNetwork
from preprocess import Preprocessor

IMAGE_SHAPE = (256, 256, 3)
HEATMAP_SIZE = (64, 64)

#### `automatic_gpu_usage` 메소드    
- gpu memory growth 옵션을 조정

In [15]:
import math
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from datetime import datetime

import click
import tensorflow as tf

from hourglass104 import StackedHourglassNetwork
from preprocess import Preprocessor

IMAGE_SHAPE = (256, 256, 3)
HEATMAP_SIZE = (64, 64)

def automatic_gpu_usage() :
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

#### Trainer class
- loss : MSE (heatmap 을 pixel 단위 MSE 로 계산) → 실제 계산은 약간 달라, compute_loss() 에서 새로 구현함.
- strategy : 분산학습용 tf.strategy 임. 사용 가능한 GPU가 1개뿐이라면 사용하지 않음.
- optimizer : Adam

##### `lr_decay` 메소드  
- learning rate : decay step에 따라 1/10씩 작아지도록 설정.

##### `compute_loss` 메소드
- loss function 계산

##### `train_step` , `val_step` 메소드
이론대로라면 self.loss_object 를 사용해서 MSE 로 구현하는 것이 맞지만 사실 동일 weight MSE 는 수렴이 잘 되지 않음.   
예측해야하는 positive (joint 들) 의 비율이 negative (배경이라고 할 수 있겠죠?) 에 비해 상당히 적은 비율로 등장하기 때문.  

label이 배경이 아닌 경우(heatmap 값이 0보다 큰 경우)에 추가적인 weight를 줃도록 함.  
weight가 82인 이유는 heatmap 전체 크기인 64x64에서 gaussian point 등장 비율이 7x7 패치이기 때문에 64/7 = 9.1 -> 9x9로 계산

In [16]:
class Trainer(object):
    def __init__(self,
                 model,
                 epochs,
                 global_batch_size,
                 strategy,
                 initial_learning_rate,
                 version='0.0.1',
                 start_epoch=1,
                 tensorboard_dir='./logs'):
        self.start_epoch = start_epoch
        self.model = model
        self.epochs = epochs
        self.strategy = strategy
        self.global_batch_size = global_batch_size
        self.loss_object = tf.keras.losses.MeanSquaredError(
            reduction=tf.keras.losses.Reduction.NONE)
        # "we use rmsprop with a learning rate of 2.5e-4.""
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=initial_learning_rate)
        self.model = model

        self.current_learning_rate = initial_learning_rate
        self.last_val_loss = math.inf
        self.lowest_val_loss = math.inf
        self.patience_count = 0
        self.max_patience = 10
        self.tensorboard_dir = tensorboard_dir
        self.best_model = None
        self.version = version

    def lr_decay(self):
        if self.patience_count >= self.max_patience:
            self.current_learning_rate /= 10.0
            self.patience_count = 0
        elif self.last_val_loss == self.lowest_val_loss:
            self.patience_count = 0
        self.patience_count += 1

        self.optimizer.learning_rate = self.current_learning_rate

    def lr_decay_step(self, epoch):
        if epoch == 25 or epoch == 50 or epoch == 75:
            self.current_learning_rate /= 10.0
        self.optimizer.learning_rate = self.current_learning_rate

    def compute_loss(self, labels, outputs):
        loss = 0
        for output in outputs:
            weights = tf.cast(labels > 0, dtype=tf.float32) * 81 + 1
            loss += tf.math.reduce_mean(
                tf.math.square(labels - output) * weights) * (
                    1. / self.global_batch_size)
        return loss

    def train_step(self, inputs):
        images, labels = inputs
        with tf.GradientTape() as tape:
            outputs = self.model(images, training=True)
            loss = self.compute_loss(labels, outputs)

        grads = tape.gradient(
            target=loss, sources=self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        return loss

    def val_step(self, inputs):
        images, labels = inputs
        outputs = self.model(images, training=False)
        loss = self.compute_loss(labels, outputs)
        return loss

    def run(self, train_dist_dataset, val_dist_dataset):
        @tf.function
        def distributed_train_epoch(dataset):
            tf.print('Start distributed traininng...')
            total_loss = 0.0
            num_train_batches = 0.0
            for one_batch in dataset:
                per_replica_loss = self.strategy.experimental_run_v2(
                    self.train_step, args=(one_batch, ))
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None)
                total_loss += batch_loss
                num_train_batches += 1
                tf.print('Trained batch', num_train_batches, 'batch loss',
                         batch_loss, 'epoch total loss', total_loss / num_train_batches)
            return total_loss, num_train_batches

        @tf.function
        def distributed_val_epoch(dataset):
            total_loss = 0.0
            num_val_batches = 0.0
            for one_batch in dataset:
                per_replica_loss = self.strategy.experimental_run_v2(
                    self.val_step, args=(one_batch, ))
                num_val_batches += 1
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None)
                tf.print('Validated batch', num_val_batches, 'batch loss',
                         batch_loss)
                if not tf.math.is_nan(batch_loss):
                    # TODO: Find out why the last validation batch loss become NaN
                    total_loss += batch_loss
                else:
                    num_val_batches -= 1

            return total_loss, num_val_batches

        summary_writer = tf.summary.create_file_writer(self.tensorboard_dir)
        summary_writer.set_as_default()

        for epoch in range(self.start_epoch, self.epochs + 1):
            tf.summary.experimental.set_step(epoch)

            self.lr_decay()
            tf.summary.scalar('epoch learning rate',
                              self.current_learning_rate)

            print('Start epoch {} with learning rate {}'.format(
                epoch, self.current_learning_rate))

            train_total_loss, num_train_batches = distributed_train_epoch(
                train_dist_dataset)
            train_loss = train_total_loss / num_train_batches
            print('Epoch {} train loss {}'.format(epoch, train_loss))
            tf.summary.scalar('epoch train loss', train_loss)

            val_total_loss, num_val_batches = distributed_val_epoch(
                val_dist_dataset)
            val_loss = val_total_loss / num_val_batches
            print('Epoch {} val loss {}'.format(epoch, val_loss))
            tf.summary.scalar('epoch val loss', val_loss)

            # save model when reach a new lowest validation loss
            if val_loss < self.lowest_val_loss:
                self.save_model(epoch, val_loss)
                self.lowest_val_loss = val_loss
            self.last_val_loss = val_loss

        return self.best_model

    def save_model(self, epoch, loss):
        model_name = './models/model-v{}-epoch-{}-loss-{:.4f}.h5'.format(
            self.version, epoch, loss)
        self.model.save_weights(model_name)
        self.best_model = model_name
        print("Model {} saved.".format(model_name))



#### tf.dataset 만들기  
##### `create_dataset` 메소드  
tfrecord파일을 `tf.dataset`으로 만듬.

##### `train` 메소드  
train함수 구현

In [17]:
def create_dataset(tfrecords, batch_size, num_heatmap, is_train):
    preprocess = Preprocessor(
        IMAGE_SHAPE, (HEATMAP_SIZE[0], HEATMAP_SIZE[1], num_heatmap), is_train)

    dataset = tf.data.Dataset.list_files(tfrecords)
    dataset = tf.data.TFRecordDataset(dataset)
    dataset = dataset.map(
        preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if is_train:
        dataset = dataset.shuffle(batch_size)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset


def train(epochs, start_epoch, learning_rate, tensorboard_dir, checkpoint,
          num_heatmap, batch_size, train_tfrecords, val_tfrecords, version):
    strategy = tf.distribute.MirroredStrategy()
    global_batch_size = strategy.num_replicas_in_sync * batch_size
    train_dataset = create_dataset(
        train_tfrecords, global_batch_size, num_heatmap, is_train=True)
    val_dataset = create_dataset(
        val_tfrecords, global_batch_size, num_heatmap, is_train=False)

    if not os.path.exists(os.path.join('./models')):
        os.makedirs(os.path.join('./models/'))

    with strategy.scope():
        train_dist_dataset = strategy.experimental_distribute_dataset(
            train_dataset)
        val_dist_dataset = strategy.experimental_distribute_dataset(
            val_dataset)

        model = StackedHourglassNetwork(IMAGE_SHAPE, 4, 1, num_heatmap)
        if checkpoint and os.path.exists(checkpoint):
            model.load_weights(checkpoint)

        trainer = Trainer(
            model,
            epochs,
            global_batch_size,
            strategy,
            initial_learning_rate=learning_rate,
            start_epoch=start_epoch,
            version=version,
            tensorboard_dir=tensorboard_dir)

        print('Start training...')
        return trainer.run(train_dist_dataset, val_dist_dataset)


In [None]:
tfrecords_dir = os.getenv('HOME')+'/aiffel/mpii/tfrecords_mpii/'
train_tfrecords = os.path.join(tfrecords_dir, 'train*')
val_tfrecords = os.path.join(tfrecords_dir, 'val*')
epochs = 5
batch_size = 16
num_heatmap = 16
tensorboard_dir = './logs/'
learning_rate = 0.0007
start_epoch = 1

automatic_gpu_usage()

pretrained_path = None # './models_old/model-v0.0.2-epoch-15-loss-1.1013.h5'

history = train(epochs, start_epoch, learning_rate, tensorboard_dir, pretrained_path,
      num_heatmap, batch_size, train_tfrecords, val_tfrecords, '0.0.1')

Start training...
Start epoch 1 with learning rate 0.0007
Start distributed traininng...
Trained batch 1 batch loss 2.43954825 epoch total loss 2.43954825
Trained batch 2 batch loss 2.38080192 epoch total loss 2.41017509
Trained batch 3 batch loss 2.45632935 epoch total loss 2.42555976
Trained batch 4 batch loss 2.4508121 epoch total loss 2.43187284
Trained batch 5 batch loss 2.39805412 epoch total loss 2.42510915
Trained batch 6 batch loss 2.2803793 epoch total loss 2.40098739
Trained batch 7 batch loss 2.25232601 epoch total loss 2.37975
Trained batch 8 batch loss 2.059057 epoch total loss 2.33966351
Trained batch 9 batch loss 2.11920094 epoch total loss 2.31516767
Trained batch 10 batch loss 2.22806096 epoch total loss 2.30645704
Trained batch 11 batch loss 2.17170811 epoch total loss 2.2942071
Trained batch 12 batch loss 2.14521074 epoch total loss 2.28179073
Trained batch 13 batch loss 2.13886261 epoch total loss 2.2707963
Trained batch 14 batch loss 2.12208605 epoch total loss 2.

Trained batch 123 batch loss 1.66911769 epoch total loss 1.81632113
Trained batch 124 batch loss 1.73577356 epoch total loss 1.81567156
Trained batch 125 batch loss 1.72361565 epoch total loss 1.81493521
Trained batch 126 batch loss 1.70499074 epoch total loss 1.8140626
Trained batch 127 batch loss 1.75179744 epoch total loss 1.81357229
Trained batch 128 batch loss 1.71510959 epoch total loss 1.81280303
Trained batch 129 batch loss 1.70718145 epoch total loss 1.8119843
Trained batch 130 batch loss 1.74139476 epoch total loss 1.8114413
Trained batch 131 batch loss 1.76240945 epoch total loss 1.81106699
Trained batch 132 batch loss 1.79894304 epoch total loss 1.81097519
Trained batch 133 batch loss 1.64030933 epoch total loss 1.80969191
Trained batch 134 batch loss 1.69346511 epoch total loss 1.80882454
Trained batch 135 batch loss 1.70813131 epoch total loss 1.80807865
Trained batch 136 batch loss 1.7232101 epoch total loss 1.80745459
Trained batch 137 batch loss 1.63857353 epoch total 

Trained batch 245 batch loss 1.64032269 epoch total loss 1.72212851
Trained batch 246 batch loss 1.53555787 epoch total loss 1.7213701
Trained batch 247 batch loss 1.53688407 epoch total loss 1.72062314
Trained batch 248 batch loss 1.42947161 epoch total loss 1.71944916
Trained batch 249 batch loss 1.52767086 epoch total loss 1.71867907
Trained batch 250 batch loss 1.50514805 epoch total loss 1.71782494
Trained batch 251 batch loss 1.42488134 epoch total loss 1.71665788
Trained batch 252 batch loss 1.33387816 epoch total loss 1.71513903
Trained batch 253 batch loss 1.35057914 epoch total loss 1.71369803
Trained batch 254 batch loss 1.43498874 epoch total loss 1.71260083
Trained batch 255 batch loss 1.47236943 epoch total loss 1.71165884
Trained batch 256 batch loss 1.60798097 epoch total loss 1.71125376
Trained batch 257 batch loss 1.683079 epoch total loss 1.71114409
Trained batch 258 batch loss 1.70498145 epoch total loss 1.71112025
Trained batch 259 batch loss 1.71806836 epoch total

Trained batch 366 batch loss 1.48406672 epoch total loss 1.68339384
Trained batch 367 batch loss 1.59274232 epoch total loss 1.68314672
Trained batch 368 batch loss 1.6145401 epoch total loss 1.68296039
Trained batch 369 batch loss 1.59187508 epoch total loss 1.68271351
Trained batch 370 batch loss 1.67742348 epoch total loss 1.6826992
Trained batch 371 batch loss 1.43172038 epoch total loss 1.68202257
Trained batch 372 batch loss 1.4152565 epoch total loss 1.68130565
Trained batch 373 batch loss 1.42271698 epoch total loss 1.68061233
Trained batch 374 batch loss 1.52113795 epoch total loss 1.68018591
Trained batch 375 batch loss 1.72290301 epoch total loss 1.68029976
Trained batch 376 batch loss 1.767416 epoch total loss 1.68053138
Trained batch 377 batch loss 1.63780916 epoch total loss 1.68041813
Trained batch 378 batch loss 1.66615629 epoch total loss 1.68038034
Trained batch 379 batch loss 1.65994203 epoch total loss 1.68032634
Trained batch 380 batch loss 1.63844228 epoch total l

Trained batch 488 batch loss 1.57375264 epoch total loss 1.65370655
Trained batch 489 batch loss 1.50649095 epoch total loss 1.65340543
Trained batch 490 batch loss 1.5341723 epoch total loss 1.65316212
Trained batch 491 batch loss 1.58050144 epoch total loss 1.65301418
Trained batch 492 batch loss 1.62065887 epoch total loss 1.65294838
Trained batch 493 batch loss 1.66780055 epoch total loss 1.65297854
Trained batch 494 batch loss 1.65012407 epoch total loss 1.65297282
Trained batch 495 batch loss 1.55416179 epoch total loss 1.65277314
Trained batch 496 batch loss 1.56386566 epoch total loss 1.65259385
Trained batch 497 batch loss 1.61935139 epoch total loss 1.65252686
Trained batch 498 batch loss 1.57529342 epoch total loss 1.65237176
Trained batch 499 batch loss 1.61630964 epoch total loss 1.65229964
Trained batch 500 batch loss 1.54213965 epoch total loss 1.65207922
Trained batch 501 batch loss 1.54336369 epoch total loss 1.65186214
Trained batch 502 batch loss 1.5719074 epoch tota

Trained batch 609 batch loss 1.36580944 epoch total loss 1.63479316
Trained batch 610 batch loss 1.55482757 epoch total loss 1.63466203
Trained batch 611 batch loss 1.62492085 epoch total loss 1.63464618
Trained batch 612 batch loss 1.63266027 epoch total loss 1.63464296
Trained batch 613 batch loss 1.58890486 epoch total loss 1.63456833
Trained batch 614 batch loss 1.6335566 epoch total loss 1.63456666
Trained batch 615 batch loss 1.62499809 epoch total loss 1.63455117
Trained batch 616 batch loss 1.5364852 epoch total loss 1.63439202
Trained batch 617 batch loss 1.6471324 epoch total loss 1.63441265
Trained batch 618 batch loss 1.57605231 epoch total loss 1.63431823
Trained batch 619 batch loss 1.60656536 epoch total loss 1.63427341
Trained batch 620 batch loss 1.59523463 epoch total loss 1.63421035
Trained batch 621 batch loss 1.54673398 epoch total loss 1.63406956
Trained batch 622 batch loss 1.3562386 epoch total loss 1.63362288
Trained batch 623 batch loss 1.49752569 epoch total 

Trained batch 731 batch loss 1.64277756 epoch total loss 1.61788237
Trained batch 732 batch loss 1.63472915 epoch total loss 1.61790538
Trained batch 733 batch loss 1.62537789 epoch total loss 1.61791563
Trained batch 734 batch loss 1.60484147 epoch total loss 1.61789775
Trained batch 735 batch loss 1.520751 epoch total loss 1.61776567
Trained batch 736 batch loss 1.41140163 epoch total loss 1.61748517
Trained batch 737 batch loss 1.32465744 epoch total loss 1.61708796
Trained batch 738 batch loss 1.49927843 epoch total loss 1.61692834
Trained batch 739 batch loss 1.56911778 epoch total loss 1.61686361
Trained batch 740 batch loss 1.60534811 epoch total loss 1.61684799
Trained batch 741 batch loss 1.59202254 epoch total loss 1.61681449
Trained batch 742 batch loss 1.46356475 epoch total loss 1.61660802
Trained batch 743 batch loss 1.30236816 epoch total loss 1.61618519
Trained batch 744 batch loss 1.450454 epoch total loss 1.61596239
Trained batch 745 batch loss 1.54449987 epoch total 

### Simplebaseline

In [None]:
import tensorflow as tf

resnet = tf.keras.applications.resnet.ResNet50(include_top=False, weights='imagenet')

def _make_deconv_layer(num_deconv_layers):
    seq_model = tf.keras.models.Sequential()

    # [[YOUR CODE]]

    return seq_model

upconv = _make_deconv_layer(3)

final_layer = # [[YOUR CODE]]


def Simplebaseline(input_shape=(256, 256, 3)):
    inputs = tf.keras.Input(shape=input_shape)

    # [[YOUR CODE]]

    model = tf.keras.Model(inputs, out, name='simple_baseline')
    return model

In [None]:
model2 = Simplebaseline()

In [None]:
def train(epochs, start_epoch, learning_rate, tensorboard_dir, checkpoint,
          num_heatmap, batch_size, train_tfrecords, val_tfrecords, version):
    strategy = tf.distribute.MirroredStrategy()
    global_batch_size = strategy.num_replicas_in_sync * batch_size
    train_dataset = create_dataset(
        train_tfrecords, global_batch_size, num_heatmap, is_train=True)
    val_dataset = create_dataset(
        val_tfrecords, global_batch_size, num_heatmap, is_train=False)

    if not os.path.exists(os.path.join('./models')):
        os.makedirs(os.path.join('./models/'))

    with strategy.scope():
        train_dist_dataset = strategy.experimental_distribute_dataset(
            train_dataset)
        val_dist_dataset = strategy.experimental_distribute_dataset(
            val_dataset)

        model = Simplebaseline(IMAGE_SHAPE)
        if checkpoint and os.path.exists(checkpoint):
            model.load_weights(checkpoint)

        trainer = Trainer(
            model,
            epochs,
            global_batch_size,
            strategy,
            initial_learning_rate=learning_rate,
            start_epoch=start_epoch,
            version=version,
            tensorboard_dir=tensorboard_dir)

        print('Start training...')
        return trainer.run(train_dist_dataset, val_dist_dataset)

In [None]:
tfrecords_dir = os.getenv('HOME')+'/aiffel/mpii/tfrecords_mpii/'
train_tfrecords = os.path.join(tfrecords_dir, 'train*')
val_tfrecords = os.path.join(tfrecords_dir, 'val*')
epochs = 5
batch_size = 16
num_heatmap = 16
tensorboard_dir = './logs/'
learning_rate = 0.0007
start_epoch = 1

automatic_gpu_usage()

pretrained_path = None # './models_old/model-v0.0.2-epoch-15-loss-1.1013.h5'

history2 = train(epochs, start_epoch, learning_rate, tensorboard_dir, pretrained_path,
      num_heatmap, batch_size, train_tfrecords, val_tfrecords, '0.0.1')