# 행동 스티커 만들기

### 데이터셋 전처리

In [1]:
import csv
import io
import json
import os

os.environ["CUDA_VISIBLE_DEVICES"] = '-1' #CPU 사용
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

workdir = os.path.join(os.getenv('HOME'),'aiffel/mpii')
os.chdir(workdir)

from loguru import logger
from PIL import Image
import ray
import tensorflow as tf

#### json 파싱
앞서 다운 받은 `train.json`과 `validation.json`은 이미지에 담겨 있는 사람들의 pose keypoint 정보들을 가지고 있음. 이는 Pose Estimation을 위한 label로 삼을 수 있음.  
이 json파일들이 어떻게 구성되어 있는지 확인하기 위해 샘플로 annotation정보를 1개만 출력함. 

In [2]:
import json, os

json_file_path = os.getenv('HOME')+'/aiffel/mpii/mpii_human_pose_v1_u12_2/train.json'

with open(json_file_path) as train_json:
    train_annos = json.load(train_json)
    json_formatted_str = json.dumps(train_annos[0], indent=2) # json beautify
    print(json_formatted_str)

{
  "joints_vis": [
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1,
    1
  ],
  "joints": [
    [
      620.0,
      394.0
    ],
    [
      616.0,
      269.0
    ],
    [
      573.0,
      185.0
    ],
    [
      647.0,
      188.0
    ],
    [
      661.0,
      221.0
    ],
    [
      656.0,
      231.0
    ],
    [
      610.0,
      187.0
    ],
    [
      647.0,
      176.0
    ],
    [
      637.0201,
      189.8183
    ],
    [
      695.9799,
      108.1817
    ],
    [
      606.0,
      217.0
    ],
    [
      553.0,
      161.0
    ],
    [
      601.0,
      167.0
    ],
    [
      692.0,
      185.0
    ],
    [
      693.0,
      240.0
    ],
    [
      688.0,
      313.0
    ]
  ],
  "image": "015601864.jpg",
  "scale": 3.021046,
  "center": [
    594.0,
    257.0
  ]
}


**`joints`** 

`joints`가 label로 사용할 keypoint의 label임.  
이미지 형상과 사람의 포즈에 따라 모든 label이 이미지에 나타나지 않기 떄문에 `joints_vis`를 이용해서 실제로 사용할 수 있는 keypoint인지를 나타냄.  
`joints`의 순서는 다음과 같음.  
0 - 오른쪽 발목  
1 - 오른쪽 무릎  
2 - 오른쪽 엉덩이  
3 - 왼쪽 엉덩이  
4 - 왼쪽 무릎  
5 - 왼쪽 발목  
6 - 골반  
7 - 가슴(흉부)  
8 - 목  
9 - 머리 위  
10 - 오른쪽 손목  
11 - 오른쪽 팔꿈치  
12 - 오른쪽 어깨  
13 - 왼쪽 어깨  
14 - 왼쪽 팔꿈치  
15 - 왼쪽 손목  

**`scale`**  
높이 = scale * 200px  
scale정보가 coco dataset에는 scale 값 또한 2차원으로 주어져서 bbox scale이 나오지만 mpii는 높이만 나옴   


**`center`**  
사람의 중심점을 의미

In [3]:
# json annotation을 파싱하는 함수 
# image의 전체 path를 묶어 dict 타입의 label로 만듬. -> 이 label을 통해 학습 수행
def parse_one_annotation(anno, image_dir):
    filename = anno['image']
    joints = anno['joints']
    joints_visibility = anno['joints_vis']
    annotation = {
        'filename': filename,
        'filepath': os.path.join(image_dir, filename),
        'joints_visibility': joints_visibility,
        'joints': joints,
        'center': anno['center'],
        'scale' : anno['scale']
    }
    return annotation

### tfrecord 파일 만들기
일반적으로 학습 과정에서 gpu의 연산 속도보다 HDD I/O의 속도가 느리기 때문에 병목 현상이 발생하고 효율성이 떨어지는 것을 관찰할 수 있음.  
따라서 "학습 데이터를 어떻게 빠르게 읽는가"에 대한 고민이 생김.  

학습 속도를 향상시키기 위해서 data read(또는 prefetch) 또는 데이터 변환 단계에서 gpu학습과 병렬적으로 수행되도록 prefetch를 적용해야함.  
수행방법은 tf.data의 map함수를 이요하고 cache에 저장해두는 방법을 사용함.  

tf는 데이터셋을 tfrecord 형태로 표현함으로써 위 변환을 자동화 해줌.  
`tfrecord`는 binary record sequence를 저장하기 위한 형식으로, 내부적으로는 protocol buffer를 이용함.  

protobuf 는 크로스플랫폼에서 사용할 수 있는 직렬화 데이터 라이브러리라고 생각하면 됨.  데이터셋 크기가 크기 때문에 빠른 학습을 위해서 이 정보를 tfrecord 파일로 변환함.  

- annotation 을 total_shards 개수로 나눔(chunkify) (train : 64개, val : 8개)
- build_single_tfrecord 함수를 통해 tfrecord 로 저장
- 각 chunk 끼리 dependency 가 없기 때문에 병렬처리가 가능, ray를 사용


In [4]:
import ray

def build_tf_records(annotations, total_shards, split):
    chunks = chunkify(annotations, total_shards)
    futures = [
        # train_0001_of_0064.tfrecords
        build_single_tfrecord.remote(
            chunk, './tfrecords_mpii/{}_{}_of_{}.tfrecords'.format(
                split,
                str(i + 1).zfill(4),
                str(total_shards).zfill(4),
            )) for i, chunk in enumerate(chunks)
    ]
    ray.get(futures)

**annotation을 적절한 개수로 나누는 함수 `chunkify`** 
- l 은 annotation, n은 shard 개수
- shard 개수 단위로 annotation list 를 나누어서 새로운 list를 만듭니다.
- numpy array 라고 가정하면 (size, shard, anno_content) 정도의 shape을 가짐. 

**tfrecord 1개를 저장하는 함수 `build_single_tfrecord`**
- TFRecordWriter 를 이용해서 anno_list 를 shard 개수 단위로 작성함.
- generate_tfexample 함수를 사용함.
- [중요] write 할 때 string 으로 serialize 해야함.

**tf.example만드는 `generate_tfexample`**
- 우리가 정의한 json 의 python type의 값들을 tfexample 에 사용할 수 있는 값으로 변환함.
- image 파일은 byte 로 변환합니다. bitmap 으로 저장하게되면 파일용량이 상당히 커지기 때문에 만약 jpeg 타입이 아닌 경우 jpeg 으로 변환 후 content 로 불러서 저장함. (H,W,C)
- 각 label 값을 tf.train.Feature 로 저장합니다. 이 때 데이터 타입에 주의해야 함.
- 이미지는 byte 인코딩 된 값을 그대로 넣음.


### Ray
Ray는 파이썬을 위한 간단한 분산 어플리케이션 api임.  
참고자료 : [https://docs.ray.io/en/latest/](https://docs.ray.io/en/latest/)  

위 내용들을 모두 하나의 파일로 정리하면 다음과 같음.  

**tfrecords_mpii.py**

In [5]:
import csv
import io
import json
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from loguru import logger
from PIL import Image
import ray
import tensorflow as tf

num_train_shards = 64
num_val_shards = 8
ray.init()
tf.get_logger().setLevel('ERROR')


def chunkify(l, n):
    size = len(l) // n
    start = 0
    results = []
    for i in range(n - 1):
        results.append(l[start:start + size])
        start += size
    results.append(l[start:])
    return results


def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy(
        )  # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def generate_tfexample(anno):
    filename = anno['filename']
    filepath = anno['filepath']
    with open(filepath, 'rb') as image_file:
        content = image_file.read()

    image = Image.open(filepath)
    if image.format != 'JPEG' or image.mode != 'RGB':
        image_rgb = image.convert('RGB')
        with io.BytesIO() as output:
            image_rgb.save(output, format="JPEG", quality=95)
            content = output.getvalue()

    width, height = image.size
    depth = 3

    c_x = int(anno['center'][0])
    c_y = int(anno['center'][1])
    scale = anno['scale']

    # x = [
    #     joint[0] / width if joint[0] >= 0 else joint[0]
    #     for joint in anno['joints']
    # ]
    # y = [
    #     joint[1] / height if joint[1] >= 0 else joint[0]
    #     for joint in anno['joints']
    # ]
    x = [
        int(joint[0]) if joint[0] >= 0 else int(joint[0]) 
        for joint in anno['joints']
    ]
    y = [
        int(joint[1]) if joint[1] >= 0 else int(joint[0]) 
        for joint in anno['joints']
    ]
    # 0 - invisible, 1 - occluded, 2 - visible
    v = [0 if joint_v == 0 else 2 for joint_v in anno['joints_visibility']]

    feature = {
        'image/height':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
        'image/width':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
        'image/depth':
        tf.train.Feature(int64_list=tf.train.Int64List(value=[depth])),
        'image/object/parts/x':
        tf.train.Feature(int64_list=tf.train.Int64List(value=x)),
        'image/object/parts/y':
        tf.train.Feature(int64_list=tf.train.Int64List(value=y)),
        'image/object/center/x': 
        tf.train.Feature(int64_list=tf.train.Int64List(value=[c_x])),
        'image/object/center/y': 
        tf.train.Feature(int64_list=tf.train.Int64List(value=[c_y])),
        'image/object/scale':
        tf.train.Feature(float_list=tf.train.FloatList(value=[scale])),
        # 'image/object/parts/x':
        # tf.train.Feature(float_list=tf.train.FloatList(value=x)),
        # 'image/object/parts/y':
        # tf.train.Feature(float_list=tf.train.FloatList(value=y)),
        'image/object/parts/v':
        tf.train.Feature(int64_list=tf.train.Int64List(value=v)),
        'image/encoded':
        _bytes_feature(content),
        'image/filename':
        _bytes_feature(filename.encode())
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))


@ray.remote
def build_single_tfrecord(chunk, path):
    print('start to build tf records for ' + path)

    with tf.io.TFRecordWriter(path) as writer:
        for anno_list in chunk:
            tf_example = generate_tfexample(anno_list)
            writer.write(tf_example.SerializeToString())

    print('finished building tf records for ' + path)


def build_tf_records(annotations, total_shards, split):
    chunks = chunkify(annotations, total_shards)
    futures = [
        # train_0001_of_0064.tfrecords
        build_single_tfrecord.remote(
            chunk, './tfrecords_mpii/{}_{}_of_{}.tfrecords'.format(
                split,
                str(i + 1).zfill(4),
                str(total_shards).zfill(4),
            )) for i, chunk in enumerate(chunks)
    ]
    ray.get(futures)


def parse_one_annotation(anno, image_dir):
    filename = anno['image']
    joints = anno['joints']
    joints_visibility = anno['joints_vis']
    annotation = {
        'filename': filename,
        'filepath': os.path.join(image_dir, filename),
        'joints_visibility': joints_visibility,
        'joints': joints,
        'center': anno['center'],
        'scale' : anno['scale']
    }
    return annotation


def main():
    print('Start to parse annotations.')
    if not os.path.exists('./tfrecords_mpii'):
        os.makedirs('./tfrecords_mpii')

    with open(workdir + '/mpii_human_pose_v1_u12_2/train.json') as train_json:
        train_annos = json.load(train_json)
        train_annotations = [
            parse_one_annotation(anno, './images/')
            for anno in train_annos
        ]
        print('First train annotation: ', train_annotations[0])
        del (train_annos)

    with open(workdir + '/mpii_human_pose_v1_u12_2/validation.json') as val_json:
        val_annos = json.load(val_json)
        val_annotations = [
            parse_one_annotation(anno, './images/') for anno in val_annos
        ]
        print('First val annotation: ', val_annotations[0])
        del (val_annos)

    print('Start to build TF Records.')
    build_tf_records(train_annotations, num_train_shards, 'train')
    build_tf_records(val_annotations, num_val_shards, 'val')

    print('Successfully wrote {} annotations to TF Records.'.format(
        len(train_annotations) + len(val_annotations)))


if __name__ == '__main__':
    main()

2021-05-22 14:32:37,474	INFO services.py:1269 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


Start to parse annotations.
First train annotation:  {'filename': '015601864.jpg', 'filepath': './images/015601864.jpg', 'joints_visibility': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'joints': [[620.0, 394.0], [616.0, 269.0], [573.0, 185.0], [647.0, 188.0], [661.0, 221.0], [656.0, 231.0], [610.0, 187.0], [647.0, 176.0], [637.0201, 189.8183], [695.9799, 108.1817], [606.0, 217.0], [553.0, 161.0], [601.0, 167.0], [692.0, 185.0], [693.0, 240.0], [688.0, 313.0]], 'center': [594.0, 257.0], 'scale': 3.021046}
First val annotation:  {'filename': '005808361.jpg', 'filepath': './images/005808361.jpg', 'joints_visibility': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'joints': [[804.0, 711.0], [816.0, 510.0], [908.0, 438.0], [1040.0, 454.0], [906.0, 528.0], [883.0, 707.0], [974.0, 446.0], [985.0, 253.0], [982.7591, 235.9694], [962.2409, 80.0306], [869.0, 214.0], [798.0, 340.0], [902.0, 253.0], [1067.0, 253.0], [1167.0, 353.0], [1142.0, 478.0]], 'center': [966.0, 340.0], 'scale': 4.7

[2m[36m(pid=9530)[0m finished building tf records for ./tfrecords_mpii/train_0032_of_0064.tfrecords
[2m[36m(pid=9530)[0m start to build tf records for ./tfrecords_mpii/train_0043_of_0064.tfrecords
[2m[36m(pid=9527)[0m finished building tf records for ./tfrecords_mpii/train_0031_of_0064.tfrecords
[2m[36m(pid=9527)[0m start to build tf records for ./tfrecords_mpii/train_0044_of_0064.tfrecords
[2m[36m(pid=9531)[0m finished building tf records for ./tfrecords_mpii/train_0033_of_0064.tfrecords
[2m[36m(pid=9531)[0m start to build tf records for ./tfrecords_mpii/train_0045_of_0064.tfrecords
[2m[36m(pid=9535)[0m finished building tf records for ./tfrecords_mpii/train_0034_of_0064.tfrecords
[2m[36m(pid=9535)[0m start to build tf records for ./tfrecords_mpii/train_0046_of_0064.tfrecords
[2m[36m(pid=9532)[0m finished building tf records for ./tfrecords_mpii/train_0035_of_0064.tfrecords
[2m[36m(pid=9532)[0m start to build tf records for ./tfrecords_mpii/train_0047_of_

In [6]:
# 약 200mb 정도의 tfrecords들이 72개 만들어진 것을 확인할 수 있음. 
%ls | wc

     17      17     391


### data label로 만들기
tfrecords 파일을 읽고 전처리를 할 수 있는 dataloader를 만듬.  

**`Preprocessor` class**  
####  `__call__()` 메소드  
`Preprocessor` 클래스 코드의 `__call__()` 메소드 내부에서 진행되는 주요 과정은 다음과 같음.  
- tfrecord 파일이기 때문에 병렬로 읽는 것은 tf 가 지원해주고 있음. `self.parse_tfexample()` 에 구현되어 있고 이 함수를 통해 tf.tensor 로 이루어진 dictionary 형태의 features를 얻을 수 있음.  
- 즉 image 는 `features['image/encoded']` 형태로 사용할 수 있고 tfrecord 를 저장할 때 jpeg encoding 된 값을 넣었으므로 `tf.io.decode_jpeg()`로 decoding 하여 tensor 형태의 이미지를 얻음.  
- `crop_roi()` 메소드를 이용해 해당 이미지를 학습하기 편하도록 몇가지 트릭을 적용함. 
- `make_heatmaps()` 메소드를 이용해 label을 heatmap 으로 나타냄.


####  `parse_tfexample` 메소드  

- tfrecord 파일 형식을 우리가 저장한 data type feature 에 맞게 parsing 함.   
- tf 가 자동으로 parsing 해주는 점은 아주 편하지만 feature description 을 정확하게 알고 있어야하는 단점이 있음.   
- 즉, tfrecord 에서 사용할 key 값들과 data type 을 모르면 tfrecord 파일을 사용하기 굉장히 어려움. (serialize 되어있으므로..)

####  `crop_roi` 메소드  

- 얻은 image 와 label 을 이용해서 적절한 학습형태로 변환

#### `make_heatmaps` 메소드  

- 우리가 알고 있는 것은 joints의 위치, center의 좌표, body height값임. 균일하게 학습하기 위해 body width도 적절히 정하는 것도 중요함.  

- 높이 정보와 keypoint 위치를 이용해서 정사각형 박스를 사용하는 것을 기본으로 디자인 함. 이와 관련해서는 여러 방법이 있을 수 있지만, 우리가 임의로 조정한 crop box가 이미지 바깥으로 나가지 않는지 예외처리 하는 것을 더 중요하게 봄.  

- (x,y)좌표로 되어있는 keypoint를 heatmap으로 변경시킴. 
- 16개의 점을 generate_2d_gaussian() 함수를 이용해서 64x64 의 map 에 표현함. 


####  `generate_2d_guassian` 메소드   

- sigma 값이 1 이고 window size 7 인 필터를 이용해서 만듬.  

위 내용들을 하나의 py 파일로 정리하면 다음과 같음.  

**preprocess.py**

In [7]:
import tensorflow as tf


class Preprocessor(object):
    def __init__(self,
                 image_shape=(256, 256, 3),
                 heatmap_shape=(64, 64, 16),
                 is_train=False):
        self.is_train = is_train
        self.image_shape = image_shape
        self.heatmap_shape = heatmap_shape

    def __call__(self, example):
        features = self.parse_tfexample(example)
        image = tf.io.decode_jpeg(features['image/encoded'])

        if self.is_train:
            random_margin = tf.random.uniform([1], 0.1, 0.3)[0]
            image, keypoint_x, keypoint_y = self.crop_roi(image, features, margin=random_margin)
            image = tf.image.resize(image, self.image_shape[0:2])
        else:
            image, keypoint_x, keypoint_y = self.crop_roi(image, features)
            image = tf.image.resize(image, self.image_shape[0:2])

        image = tf.cast(image, tf.float32) / 127.5 - 1
        heatmaps = self.make_heatmaps(features, keypoint_x, keypoint_y)

        # print (image.shape, heatmaps.shape, type(heatmaps))

        return image, heatmaps


    def crop_roi(self, image, features, margin=0.2):
        img_shape = tf.shape(image)
        img_height = img_shape[0]
        img_width = img_shape[1]
        img_depth = img_shape[2]

        keypoint_x = tf.cast(tf.sparse.to_dense(features['image/object/parts/x']), dtype=tf.int32)
        keypoint_y = tf.cast(tf.sparse.to_dense(features['image/object/parts/y']), dtype=tf.int32)
        center_x = features['image/object/center/x']
        center_y = features['image/object/center/y']
        body_height = features['image/object/scale'] * 200.0

        masked_keypoint_x = tf.boolean_mask(keypoint_x, keypoint_x > 0)
        masked_keypoint_y = tf.boolean_mask(keypoint_y, keypoint_y > 0)

        keypoint_xmin = tf.reduce_min(masked_keypoint_x)
        keypoint_xmax = tf.reduce_max(masked_keypoint_x)
        keypoint_ymin = tf.reduce_min(masked_keypoint_y)
        keypoint_ymax = tf.reduce_max(masked_keypoint_y)

        xmin = keypoint_xmin - tf.cast(body_height * margin, dtype=tf.int32)
        xmax = keypoint_xmax + tf.cast(body_height * margin, dtype=tf.int32)
        ymin = keypoint_ymin - tf.cast(body_height * margin, dtype=tf.int32)
        ymax = keypoint_ymax + tf.cast(body_height * margin, dtype=tf.int32)

        effective_xmin = xmin if xmin > 0 else 0
        effective_ymin = ymin if ymin > 0 else 0
        effective_xmax = xmax if xmax < img_width else img_width
        effective_ymax = ymax if ymax < img_height else img_height
        effective_height = effective_ymax - effective_ymin
        effective_width = effective_xmax - effective_xmin

        image = image[effective_ymin:effective_ymax, effective_xmin:effective_xmax, :]
        new_shape = tf.shape(image)
        new_height = new_shape[0]
        new_width = new_shape[1]

        effective_keypoint_x = (keypoint_x - effective_xmin) / new_width
        effective_keypoint_y = (keypoint_y - effective_ymin) / new_height

        return image, effective_keypoint_x, effective_keypoint_y


    def generate_2d_guassian(self, height, width, y0, x0, visibility=2, sigma=1, scale=12):
        """
        "The same technique as Tompson et al. is used for supervision. A MeanSquared Error (MSE) loss is
        applied comparing the predicted heatmap to a ground-truth heatmap consisting of a 2D gaussian
        (with standard deviation of 1 px) centered on the keypoint location."

        https://github.com/princeton-vl/pose-hg-train/blob/master/src/util/img.lua#L204
        """
        heatmap = tf.zeros((height, width))

        # this gaussian patch is 7x7, let's get four corners of it first
        xmin = x0 - 3 * sigma
        ymin = y0 - 3 * sigma
        xmax = x0 + 3 * sigma
        ymax = y0 + 3 * sigma
        # if the patch is out of image boundary we simply return nothing according to the source code
        # [1]"In these cases the joint is either truncated or severely occluded, so for
        # supervision a ground truth heatmap of all zeros is provided."
        if xmin >= width or ymin >= height or xmax < 0 or ymax <0 or visibility == 0:
            return heatmap

        size = 6 * sigma + 1
        x, y = tf.meshgrid(tf.range(0, 6*sigma+1, 1), tf.range(0, 6*sigma+1, 1), indexing='xy')

        # the center of the gaussian patch should be 1
        center_x = size // 2
        center_y = size // 2

        # generate this 7x7 gaussian patch
        gaussian_patch = tf.cast(tf.math.exp(-(tf.square(x - center_x) + tf.math.square(y - center_y)) / (tf.math.square(sigma) * 2)) * scale, dtype=tf.float32)

        # part of the patch could be out of the boundary, so we need to determine the valid range
        # if xmin = -2, it means the 2 left-most columns are invalid, which is max(0, -(-2)) = 2
        patch_xmin = tf.math.maximum(0, -xmin)
        patch_ymin = tf.math.maximum(0, -ymin)
        # if xmin = 59, xmax = 66, but our output is 64x64, then we should discard 2 right-most columns
        # which is min(64, 66) - 59 = 5, and column 6 and 7 are discarded
        patch_xmax = tf.math.minimum(xmax, width) - xmin
        patch_ymax = tf.math.minimum(ymax, height) - ymin

        # also, we need to determine where to put this patch in the whole heatmap
        heatmap_xmin = tf.math.maximum(0, xmin)
        heatmap_ymin = tf.math.maximum(0, ymin)
        heatmap_xmax = tf.math.minimum(xmax, width)
        heatmap_ymax = tf.math.minimum(ymax, height)

        # finally, insert this patch into the heatmap
        indices = tf.TensorArray(tf.int32, 1, dynamic_size=True)
        updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)

        count = 0

        for j in tf.range(patch_ymin, patch_ymax):
            for i in tf.range(patch_xmin, patch_xmax):
                indices = indices.write(count, [heatmap_ymin+j, heatmap_xmin+i])
                updates = updates.write(count, gaussian_patch[j][i])
                count += 1

        heatmap = tf.tensor_scatter_nd_update(heatmap, indices.stack(), updates.stack())

        return heatmap


    def make_heatmaps(self, features, keypoint_x, keypoint_y):
        v = tf.cast(tf.sparse.to_dense(features['image/object/parts/v']), dtype=tf.float32)
        x = tf.cast(tf.math.round(keypoint_x * self.heatmap_shape[0]), dtype=tf.int32)
        y = tf.cast(tf.math.round(keypoint_y * self.heatmap_shape[1]), dtype=tf.int32)

        num_heatmap = self.heatmap_shape[2]
        heatmap_array = tf.TensorArray(tf.float32, 16)

        for i in range(num_heatmap):
            gaussian = self.generate_2d_guassian(self.heatmap_shape[1], self.heatmap_shape[0], y[i], x[i], v[i])
            heatmap_array = heatmap_array.write(i, gaussian)

        heatmaps = heatmap_array.stack()
        heatmaps = tf.transpose(heatmaps, perm=[1, 2, 0]) # change to (64, 64, 16)

        return heatmaps

    def parse_tfexample(self, example_proto):
        image_feature_description = {
            'image/height': tf.io.FixedLenFeature([], tf.int64),
            'image/width': tf.io.FixedLenFeature([], tf.int64),
            'image/depth': tf.io.FixedLenFeature([], tf.int64),
            'image/object/parts/x': tf.io.VarLenFeature(tf.int64),
            'image/object/parts/y': tf.io.VarLenFeature(tf.int64),
            'image/object/parts/v': tf.io.VarLenFeature(tf.int64),
            'image/object/center/x': tf.io.FixedLenFeature([], tf.int64),
            'image/object/center/y': tf.io.FixedLenFeature([], tf.int64),
            'image/object/scale': tf.io.FixedLenFeature([], tf.float32),
            'image/encoded': tf.io.FixedLenFeature([], tf.string),
            'image/filename': tf.io.FixedLenFeature([], tf.string),
        }
        return tf.io.parse_single_example(example_proto,
                                          image_feature_description)

### 모델 학습

#### Hourglass 모델 만들기

In [8]:
import tensorflow as tf

from tensorflow.keras.layers import Add, Concatenate, Lambda
from tensorflow.keras.layers import Input, Conv2D, ReLU, MaxPool2D
from tensorflow.keras.layers import UpSampling2D, ZeroPadding2D
from tensorflow.keras.layers import BatchNormalization

##### Residual block module

In [9]:
def BottleneckBlock(inputs, filters, strides=1, downsample=False, name=None):
    identity = inputs
    if downsample:
        identity = Conv2D(
            filters=filters,  # lift channels first
            kernel_size=1,
            strides=strides,
            padding='same',
            kernel_initializer='he_normal')(inputs)

    x = BatchNormalization(momentum=0.9)(inputs)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters // 2,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters // 2,
        kernel_size=3,
        strides=strides,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = Conv2D(
        filters=filters,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(x)

    x = Add()([identity, x])
    return x

##### Hourglass module

In [10]:
def HourglassModule(inputs, order, filters, num_residual):
    """
    https://github.com/princeton-vl/pose-hg-train/blob/master/src/models/hg.lua#L3
    """
    # Upper branch
    up1 = BottleneckBlock(inputs, filters, downsample=False)

    for i in range(num_residual):
        up1 = BottleneckBlock(up1, filters, downsample=False)

    # Lower branch
    low1 = MaxPool2D(pool_size=2, strides=2)(inputs)
    for i in range(num_residual):
        low1 = BottleneckBlock(low1, filters, downsample=False)

    low2 = low1
    if order > 1:
        low2 = HourglassModule(low1, order - 1, filters, num_residual)
    else:
        for i in range(num_residual):
            low2 = BottleneckBlock(low2, filters, downsample=False)

    low3 = low2
    for i in range(num_residual):
        low3 = BottleneckBlock(low3, filters, downsample=False)

    up2 = UpSampling2D(size=2)(low3)

    return up2 + up1

##### intermediate output을 위한 linear layer

In [11]:
def LinearLayer(inputs, filters):
    x = Conv2D(
        filters=filters,
        kernel_size=1,
        strides=1,
        padding='same',
        kernel_initializer='he_normal')(inputs)
    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    return x

#### Stacked Hourglass

In [12]:
def StackedHourglassNetwork(
        input_shape=(256, 256, 3), num_stack=4, num_residual=1,
        num_heatmap=16):
    """
    https://github.com/princeton-vl/pose-hg-train/blob/master/src/models/hg.lua#L33
    """
    inputs = Input(shape=input_shape)

    # initial processing of the image
    x = Conv2D(
        filters=64,
        kernel_size=7,
        strides=2,
        padding='same',
        kernel_initializer='he_normal')(inputs)
    x = BatchNormalization(momentum=0.9)(x)
    x = ReLU()(x)
    x = BottleneckBlock(x, 128, downsample=True)
    x = MaxPool2D(pool_size=2, strides=2)(x)
    x = BottleneckBlock(x, 128, downsample=False)
    x = BottleneckBlock(x, 256, downsample=True)

    ys = []
    for i in range(num_stack):
        x = HourglassModule(x, order=4, filters=256, num_residual=num_residual)
        for i in range(num_residual):
            x = BottleneckBlock(x, 256, downsample=False)

        # predict 256 channels like a fully connected layer.
        x = LinearLayer(x, 256)

        # predict final channels, which is also the number of predicted heatmap
        y = Conv2D(
            filters=num_heatmap,
            kernel_size=1,
            strides=1,
            padding='same',
            kernel_initializer='he_normal')(x)
        ys.append(y)

        # if it's not the last stack, we need to add predictions back
        if i < num_stack - 1:
            y_intermediate_1 = Conv2D(filters=256, kernel_size=1, strides=1)(x)
            y_intermediate_2 = Conv2D(filters=256, kernel_size=1, strides=1)(y)
            x = Add()([y_intermediate_1, y_intermediate_2])

    return tf.keras.Model(inputs, ys, name='stacked_hourglass')

In [13]:
model = StackedHourglassNetwork()

### 학습 엔진 만들기
학습 코드 `train.py`를 구현.  
지금까지 제작한 `*.py` 모듈들은 여기서 참조(import)되어 사용될 것임. 

In [14]:
import math
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from datetime import datetime

import click
import tensorflow as tf

from hourglass104 import StackedHourglassNetwork
from preprocess import Preprocessor

IMAGE_SHAPE = (256, 256, 3)
HEATMAP_SIZE = (64, 64)

#### `automatic_gpu_usage` 메소드    
- gpu memory growth 옵션을 조정

In [15]:
import math
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from datetime import datetime

import click
import tensorflow as tf

from hourglass104 import StackedHourglassNetwork
from preprocess import Preprocessor

IMAGE_SHAPE = (256, 256, 3)
HEATMAP_SIZE = (64, 64)

def automatic_gpu_usage() :
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)

#### Trainer class
- loss : MSE (heatmap 을 pixel 단위 MSE 로 계산) → 실제 계산은 약간 달라, compute_loss() 에서 새로 구현함.
- strategy : 분산학습용 tf.strategy 임. 사용 가능한 GPU가 1개뿐이라면 사용하지 않음.
- optimizer : Adam

##### `lr_decay` 메소드  
- learning rate : decay step에 따라 1/10씩 작아지도록 설정.

##### `compute_loss` 메소드
- loss function 계산

##### `train_step` , `val_step` 메소드
이론대로라면 self.loss_object 를 사용해서 MSE 로 구현하는 것이 맞지만 사실 동일 weight MSE 는 수렴이 잘 되지 않음.   
예측해야하는 positive (joint 들) 의 비율이 negative (배경이라고 할 수 있겠죠?) 에 비해 상당히 적은 비율로 등장하기 때문.  

label이 배경이 아닌 경우(heatmap 값이 0보다 큰 경우)에 추가적인 weight를 줃도록 함.  
weight가 82인 이유는 heatmap 전체 크기인 64x64에서 gaussian point 등장 비율이 7x7 패치이기 때문에 64/7 = 9.1 -> 9x9로 계산

In [16]:
class Trainer(object):
    def __init__(self,
                 model,
                 epochs,
                 global_batch_size,
                 strategy,
                 initial_learning_rate,
                 version='0.0.1',
                 start_epoch=1,
                 tensorboard_dir='./logs'):
        self.start_epoch = start_epoch
        self.model = model
        self.epochs = epochs
        self.strategy = strategy
        self.global_batch_size = global_batch_size
        self.loss_object = tf.keras.losses.MeanSquaredError(
            reduction=tf.keras.losses.Reduction.NONE)
        # "we use rmsprop with a learning rate of 2.5e-4.""
        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=initial_learning_rate)
        self.model = model

        self.current_learning_rate = initial_learning_rate
        self.last_val_loss = math.inf
        self.lowest_val_loss = math.inf
        self.patience_count = 0
        self.max_patience = 10
        self.tensorboard_dir = tensorboard_dir
        self.best_model = None
        self.version = version

    def lr_decay(self):
        if self.patience_count >= self.max_patience:
            self.current_learning_rate /= 10.0
            self.patience_count = 0
        elif self.last_val_loss == self.lowest_val_loss:
            self.patience_count = 0
        self.patience_count += 1

        self.optimizer.learning_rate = self.current_learning_rate

    def lr_decay_step(self, epoch):
        if epoch == 25 or epoch == 50 or epoch == 75:
            self.current_learning_rate /= 10.0
        self.optimizer.learning_rate = self.current_learning_rate

    def compute_loss(self, labels, outputs):
        loss = 0
        for output in outputs:
            weights = tf.cast(labels > 0, dtype=tf.float32) * 81 + 1
            loss += tf.math.reduce_mean(
                tf.math.square(labels - output) * weights) * (
                    1. / self.global_batch_size)
        return loss

    def train_step(self, inputs):
        images, labels = inputs
        with tf.GradientTape() as tape:
            outputs = self.model(images, training=True)
            loss = self.compute_loss(labels, outputs)

        grads = tape.gradient(
            target=loss, sources=self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        return loss

    def val_step(self, inputs):
        images, labels = inputs
        outputs = self.model(images, training=False)
        loss = self.compute_loss(labels, outputs)
        return loss

    def run(self, train_dist_dataset, val_dist_dataset):
        @tf.function
        def distributed_train_epoch(dataset):
            tf.print('Start distributed traininng...')
            total_loss = 0.0
            num_train_batches = 0.0
            for one_batch in dataset:
                per_replica_loss = self.strategy.experimental_run_v2(
                    self.train_step, args=(one_batch, ))
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None)
                total_loss += batch_loss
                num_train_batches += 1
                tf.print('Trained batch', num_train_batches, 'batch loss',
                         batch_loss, 'epoch total loss', total_loss / num_train_batches)
            return total_loss, num_train_batches

        @tf.function
        def distributed_val_epoch(dataset):
            total_loss = 0.0
            num_val_batches = 0.0
            for one_batch in dataset:
                per_replica_loss = self.strategy.experimental_run_v2(
                    self.val_step, args=(one_batch, ))
                num_val_batches += 1
                batch_loss = self.strategy.reduce(
                    tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None)
                tf.print('Validated batch', num_val_batches, 'batch loss',
                         batch_loss)
                if not tf.math.is_nan(batch_loss):
                    # TODO: Find out why the last validation batch loss become NaN
                    total_loss += batch_loss
                else:
                    num_val_batches -= 1

            return total_loss, num_val_batches

        summary_writer = tf.summary.create_file_writer(self.tensorboard_dir)
        summary_writer.set_as_default()

        for epoch in range(self.start_epoch, self.epochs + 1):
            tf.summary.experimental.set_step(epoch)

            self.lr_decay()
            tf.summary.scalar('epoch learning rate',
                              self.current_learning_rate)

            print('Start epoch {} with learning rate {}'.format(
                epoch, self.current_learning_rate))

            train_total_loss, num_train_batches = distributed_train_epoch(
                train_dist_dataset)
            train_loss = train_total_loss / num_train_batches
            print('Epoch {} train loss {}'.format(epoch, train_loss))
            tf.summary.scalar('epoch train loss', train_loss)

            val_total_loss, num_val_batches = distributed_val_epoch(
                val_dist_dataset)
            val_loss = val_total_loss / num_val_batches
            print('Epoch {} val loss {}'.format(epoch, val_loss))
            tf.summary.scalar('epoch val loss', val_loss)

            # save model when reach a new lowest validation loss
            if val_loss < self.lowest_val_loss:
                self.save_model(epoch, val_loss)
                self.lowest_val_loss = val_loss
            self.last_val_loss = val_loss

        return self.best_model

    def save_model(self, epoch, loss):
        model_name = './models/model-v{}-epoch-{}-loss-{:.4f}.h5'.format(
            self.version, epoch, loss)
        self.model.save_weights(model_name)
        self.best_model = model_name
        print("Model {} saved.".format(model_name))



#### tf.dataset 만들기  
##### `create_dataset` 메소드  
tfrecord파일을 `tf.dataset`으로 만듬.

##### `train` 메소드  
train함수 구현

In [17]:
def create_dataset(tfrecords, batch_size, num_heatmap, is_train):
    preprocess = Preprocessor(
        IMAGE_SHAPE, (HEATMAP_SIZE[0], HEATMAP_SIZE[1], num_heatmap), is_train)

    dataset = tf.data.Dataset.list_files(tfrecords)
    dataset = tf.data.TFRecordDataset(dataset)
    dataset = dataset.map(
        preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if is_train:
        dataset = dataset.shuffle(batch_size)

    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset


def train(epochs, start_epoch, learning_rate, tensorboard_dir, checkpoint,
          num_heatmap, batch_size, train_tfrecords, val_tfrecords, version):
    strategy = tf.distribute.MirroredStrategy()
    global_batch_size = strategy.num_replicas_in_sync * batch_size
    train_dataset = create_dataset(
        train_tfrecords, global_batch_size, num_heatmap, is_train=True)
    val_dataset = create_dataset(
        val_tfrecords, global_batch_size, num_heatmap, is_train=False)

    if not os.path.exists(os.path.join('./models')):
        os.makedirs(os.path.join('./models/'))

    with strategy.scope():
        train_dist_dataset = strategy.experimental_distribute_dataset(
            train_dataset)
        val_dist_dataset = strategy.experimental_distribute_dataset(
            val_dataset)

        model = StackedHourglassNetwork(IMAGE_SHAPE, 4, 1, num_heatmap)
        if checkpoint and os.path.exists(checkpoint):
            model.load_weights(checkpoint)

        trainer = Trainer(
            model,
            epochs,
            global_batch_size,
            strategy,
            initial_learning_rate=learning_rate,
            start_epoch=start_epoch,
            version=version,
            tensorboard_dir=tensorboard_dir)

        print('Start training...')
        return trainer.run(train_dist_dataset, val_dist_dataset)


In [18]:
tfrecords_dir = os.getenv('HOME')+'/aiffel/mpii/tfrecords_mpii/'
train_tfrecords = os.path.join(tfrecords_dir, 'train*')
val_tfrecords = os.path.join(tfrecords_dir, 'val*')
epochs = 2
batch_size = 8
num_heatmap = 16
tensorboard_dir = './logs/'
learning_rate = 0.0007
start_epoch = 1

automatic_gpu_usage()

pretrained_path = None # './models_old/model-v0.0.2-epoch-15-loss-1.1013.h5'

history = train(epochs, start_epoch, learning_rate, tensorboard_dir, pretrained_path,
      num_heatmap, batch_size, train_tfrecords, val_tfrecords, '0.0.1')

Start training...
Start epoch 1 with learning rate 0.0007
Start distributed traininng...
Trained batch 1 batch loss 5.03168106 epoch total loss 5.03168106
Trained batch 2 batch loss 5.12402964 epoch total loss 5.07785511
Trained batch 3 batch loss 4.79028845 epoch total loss 4.982
Trained batch 4 batch loss 4.42440367 epoch total loss 4.84260082
Trained batch 5 batch loss 4.64506721 epoch total loss 4.80309391
Trained batch 6 batch loss 4.93397522 epoch total loss 4.82490778
Trained batch 7 batch loss 3.7924664 epoch total loss 4.67741632
Trained batch 8 batch loss 3.77604985 epoch total loss 4.56474543
Trained batch 9 batch loss 3.87475538 epoch total loss 4.48808
Trained batch 10 batch loss 4.51862478 epoch total loss 4.49113417
Trained batch 11 batch loss 4.32199812 epoch total loss 4.47575808
Trained batch 12 batch loss 4.27406168 epoch total loss 4.45895052
Trained batch 13 batch loss 4.27509403 epoch total loss 4.44480753
Trained batch 14 batch loss 3.72554255 epoch total loss 4.

Trained batch 123 batch loss 3.41429591 epoch total loss 3.63375473
Trained batch 124 batch loss 3.27424121 epoch total loss 3.63085532
Trained batch 125 batch loss 3.36970854 epoch total loss 3.6287663
Trained batch 126 batch loss 2.85709548 epoch total loss 3.6226418
Trained batch 127 batch loss 2.83217764 epoch total loss 3.61641788
Trained batch 128 batch loss 3.54324126 epoch total loss 3.61584616
Trained batch 129 batch loss 3.46509647 epoch total loss 3.61467743
Trained batch 130 batch loss 3.59167337 epoch total loss 3.61450052
Trained batch 131 batch loss 3.36966538 epoch total loss 3.61263156
Trained batch 132 batch loss 3.28597641 epoch total loss 3.61015701
Trained batch 133 batch loss 2.78738785 epoch total loss 3.60397053
Trained batch 134 batch loss 3.28375196 epoch total loss 3.60158086
Trained batch 135 batch loss 3.53306937 epoch total loss 3.6010735
Trained batch 136 batch loss 3.49316669 epoch total loss 3.60028
Trained batch 137 batch loss 3.25681162 epoch total lo

Trained batch 244 batch loss 3.34938884 epoch total loss 3.47380352
Trained batch 245 batch loss 3.45953226 epoch total loss 3.47374511
Trained batch 246 batch loss 3.50236368 epoch total loss 3.47386146
Trained batch 247 batch loss 3.40180945 epoch total loss 3.47356987
Trained batch 248 batch loss 3.39834046 epoch total loss 3.47326636
Trained batch 249 batch loss 3.54538441 epoch total loss 3.47355604
Trained batch 250 batch loss 3.60761452 epoch total loss 3.47409225
Trained batch 251 batch loss 3.49608898 epoch total loss 3.47418
Trained batch 252 batch loss 3.30500174 epoch total loss 3.4735086
Trained batch 253 batch loss 3.26239276 epoch total loss 3.47267413
Trained batch 254 batch loss 3.28704476 epoch total loss 3.47194338
Trained batch 255 batch loss 3.29342628 epoch total loss 3.47124314
Trained batch 256 batch loss 3.50219059 epoch total loss 3.47136402
Trained batch 257 batch loss 3.00093627 epoch total loss 3.46953344
Trained batch 258 batch loss 2.87089682 epoch total 

Trained batch 366 batch loss 3.1496439 epoch total loss 3.41594362
Trained batch 367 batch loss 3.20007944 epoch total loss 3.41535544
Trained batch 368 batch loss 3.03381038 epoch total loss 3.41431856
Trained batch 369 batch loss 3.40681076 epoch total loss 3.4142983
Trained batch 370 batch loss 3.17637205 epoch total loss 3.41365528
Trained batch 371 batch loss 3.0846138 epoch total loss 3.41276836
Trained batch 372 batch loss 3.14076424 epoch total loss 3.41203713
Trained batch 373 batch loss 3.12268591 epoch total loss 3.41126132
Trained batch 374 batch loss 3.30079269 epoch total loss 3.41096592
Trained batch 375 batch loss 3.09790087 epoch total loss 3.41013122
Trained batch 376 batch loss 3.18670845 epoch total loss 3.40953708
Trained batch 377 batch loss 3.1478405 epoch total loss 3.40884304
Trained batch 378 batch loss 3.2162745 epoch total loss 3.40833354
Trained batch 379 batch loss 3.05482769 epoch total loss 3.40740085
Trained batch 380 batch loss 3.12438822 epoch total l

Trained batch 488 batch loss 3.38509679 epoch total loss 3.35698628
Trained batch 489 batch loss 3.04830623 epoch total loss 3.35635519
Trained batch 490 batch loss 3.12002158 epoch total loss 3.35587287
Trained batch 491 batch loss 3.23748732 epoch total loss 3.35563159
Trained batch 492 batch loss 3.04857922 epoch total loss 3.35500741
Trained batch 493 batch loss 2.97511482 epoch total loss 3.35423684
Trained batch 494 batch loss 3.36731482 epoch total loss 3.35426331
Trained batch 495 batch loss 3.30293083 epoch total loss 3.35415983
Trained batch 496 batch loss 3.22721577 epoch total loss 3.35390377
Trained batch 497 batch loss 3.3040452 epoch total loss 3.3538034
Trained batch 498 batch loss 3.09690833 epoch total loss 3.3532877
Trained batch 499 batch loss 3.18139267 epoch total loss 3.35294318
Trained batch 500 batch loss 3.27514672 epoch total loss 3.35278749
Trained batch 501 batch loss 3.28421426 epoch total loss 3.35265064
Trained batch 502 batch loss 3.29201579 epoch total

Trained batch 609 batch loss 3.25492167 epoch total loss 3.31906891
Trained batch 610 batch loss 3.23411179 epoch total loss 3.31892967
Trained batch 611 batch loss 3.14022255 epoch total loss 3.31863737
Trained batch 612 batch loss 3.35175157 epoch total loss 3.31869149
Trained batch 613 batch loss 3.19419909 epoch total loss 3.31848836
Trained batch 614 batch loss 3.31624079 epoch total loss 3.31848478
Trained batch 615 batch loss 3.29217863 epoch total loss 3.31844211
Trained batch 616 batch loss 3.16509986 epoch total loss 3.3181932
Trained batch 617 batch loss 3.31788111 epoch total loss 3.31819248
Trained batch 618 batch loss 3.14072466 epoch total loss 3.31790543
Trained batch 619 batch loss 3.15820479 epoch total loss 3.31764746
Trained batch 620 batch loss 3.21280527 epoch total loss 3.31747842
Trained batch 621 batch loss 3.18144917 epoch total loss 3.31725931
Trained batch 622 batch loss 3.27219081 epoch total loss 3.31718683
Trained batch 623 batch loss 3.21870875 epoch tot

Trained batch 730 batch loss 3.35564232 epoch total loss 3.28533745
Trained batch 731 batch loss 3.26584148 epoch total loss 3.28531098
Trained batch 732 batch loss 3.24882674 epoch total loss 3.28526092
Trained batch 733 batch loss 2.96360111 epoch total loss 3.28482223
Trained batch 734 batch loss 3.16572595 epoch total loss 3.28466
Trained batch 735 batch loss 2.73605132 epoch total loss 3.28391361
Trained batch 736 batch loss 3.22394204 epoch total loss 3.28383207
Trained batch 737 batch loss 3.2965169 epoch total loss 3.28384948
Trained batch 738 batch loss 2.90873671 epoch total loss 3.28334117
Trained batch 739 batch loss 3.17750263 epoch total loss 3.28319788
Trained batch 740 batch loss 3.29742551 epoch total loss 3.28321695
Trained batch 741 batch loss 3.21521378 epoch total loss 3.2831254
Trained batch 742 batch loss 3.18361807 epoch total loss 3.28299117
Trained batch 743 batch loss 3.34792066 epoch total loss 3.28307867
Trained batch 744 batch loss 3.23147917 epoch total l

Trained batch 851 batch loss 3.08953357 epoch total loss 3.25891352
Trained batch 852 batch loss 3.12805676 epoch total loss 3.25876021
Trained batch 853 batch loss 2.79696894 epoch total loss 3.25821853
Trained batch 854 batch loss 2.68381548 epoch total loss 3.25754595
Trained batch 855 batch loss 2.75530338 epoch total loss 3.25695872
Trained batch 856 batch loss 2.97025967 epoch total loss 3.25662374
Trained batch 857 batch loss 2.9227047 epoch total loss 3.25623393
Trained batch 858 batch loss 3.32266903 epoch total loss 3.25631142
Trained batch 859 batch loss 3.33669615 epoch total loss 3.25640512
Trained batch 860 batch loss 3.08624172 epoch total loss 3.25620723
Trained batch 861 batch loss 3.18972683 epoch total loss 3.25613
Trained batch 862 batch loss 2.92237 epoch total loss 3.25574279
Trained batch 863 batch loss 3.21349669 epoch total loss 3.25569367
Trained batch 864 batch loss 3.26385593 epoch total loss 3.25570321
Trained batch 865 batch loss 3.15333295 epoch total los

Trained batch 973 batch loss 3.11037207 epoch total loss 3.24384952
Trained batch 974 batch loss 3.06657863 epoch total loss 3.2436676
Trained batch 975 batch loss 2.99519634 epoch total loss 3.24341273
Trained batch 976 batch loss 2.99867606 epoch total loss 3.24316216
Trained batch 977 batch loss 3.11477089 epoch total loss 3.24303055
Trained batch 978 batch loss 3.08009601 epoch total loss 3.24286389
Trained batch 979 batch loss 3.33916855 epoch total loss 3.24296236
Trained batch 980 batch loss 3.37684679 epoch total loss 3.24309897
Trained batch 981 batch loss 3.24800396 epoch total loss 3.24310398
Trained batch 982 batch loss 3.06230545 epoch total loss 3.24292
Trained batch 983 batch loss 3.08561468 epoch total loss 3.24276
Trained batch 984 batch loss 2.97109962 epoch total loss 3.24248409
Trained batch 985 batch loss 3.06129646 epoch total loss 3.2423
Trained batch 986 batch loss 3.02316141 epoch total loss 3.24207783
Trained batch 987 batch loss 3.03695107 epoch total loss 3.

Trained batch 1093 batch loss 3.10514164 epoch total loss 3.22554946
Trained batch 1094 batch loss 3.10561657 epoch total loss 3.22544
Trained batch 1095 batch loss 3.34464407 epoch total loss 3.22554898
Trained batch 1096 batch loss 2.95460796 epoch total loss 3.22530174
Trained batch 1097 batch loss 2.80874467 epoch total loss 3.22492218
Trained batch 1098 batch loss 2.98877573 epoch total loss 3.22470689
Trained batch 1099 batch loss 3.14905024 epoch total loss 3.22463822
Trained batch 1100 batch loss 3.11492848 epoch total loss 3.22453856
Trained batch 1101 batch loss 2.99510622 epoch total loss 3.22433019
Trained batch 1102 batch loss 3.1049974 epoch total loss 3.22422194
Trained batch 1103 batch loss 3.0489068 epoch total loss 3.22406292
Trained batch 1104 batch loss 3.08587122 epoch total loss 3.22393775
Trained batch 1105 batch loss 3.06745434 epoch total loss 3.22379613
Trained batch 1106 batch loss 3.09015846 epoch total loss 3.22367525
Trained batch 1107 batch loss 3.1315119

Trained batch 1213 batch loss 3.2513454 epoch total loss 3.21442366
Trained batch 1214 batch loss 3.11301899 epoch total loss 3.21434021
Trained batch 1215 batch loss 2.97467518 epoch total loss 3.2141428
Trained batch 1216 batch loss 3.23696971 epoch total loss 3.21416163
Trained batch 1217 batch loss 3.40445352 epoch total loss 3.21431804
Trained batch 1218 batch loss 3.49581099 epoch total loss 3.2145493
Trained batch 1219 batch loss 3.48548985 epoch total loss 3.21477175
Trained batch 1220 batch loss 3.15564585 epoch total loss 3.21472335
Trained batch 1221 batch loss 2.84933448 epoch total loss 3.21442413
Trained batch 1222 batch loss 3.05975747 epoch total loss 3.21429753
Trained batch 1223 batch loss 3.00798035 epoch total loss 3.21412897
Trained batch 1224 batch loss 3.05053258 epoch total loss 3.21399522
Trained batch 1225 batch loss 2.96415281 epoch total loss 3.21379137
Trained batch 1226 batch loss 2.9917345 epoch total loss 3.21361017
Trained batch 1227 batch loss 3.191337

Trained batch 1333 batch loss 3.0392735 epoch total loss 3.19662428
Trained batch 1334 batch loss 3.55537963 epoch total loss 3.19689298
Trained batch 1335 batch loss 2.9957 epoch total loss 3.1967423
Trained batch 1336 batch loss 3.06589079 epoch total loss 3.19664431
Trained batch 1337 batch loss 2.83453584 epoch total loss 3.19637346
Trained batch 1338 batch loss 2.9703567 epoch total loss 3.19620442
Trained batch 1339 batch loss 2.81728935 epoch total loss 3.19592142
Trained batch 1340 batch loss 2.83925962 epoch total loss 3.19565535
Trained batch 1341 batch loss 2.85868788 epoch total loss 3.19540429
Trained batch 1342 batch loss 2.92336059 epoch total loss 3.19520164
Trained batch 1343 batch loss 2.89523196 epoch total loss 3.194978
Trained batch 1344 batch loss 2.77447152 epoch total loss 3.19466519
Trained batch 1345 batch loss 2.87277365 epoch total loss 3.19442558
Trained batch 1346 batch loss 2.92572021 epoch total loss 3.19422603
Trained batch 1347 batch loss 3.11394024 ep

Trained batch 1453 batch loss 3.07679343 epoch total loss 3.18034172
Trained batch 1454 batch loss 3.14021301 epoch total loss 3.18031406
Trained batch 1455 batch loss 3.14499736 epoch total loss 3.18029
Trained batch 1456 batch loss 3.10282707 epoch total loss 3.18023682
Trained batch 1457 batch loss 3.13812733 epoch total loss 3.18020797
Trained batch 1458 batch loss 3.09527063 epoch total loss 3.18014956
Trained batch 1459 batch loss 2.74915242 epoch total loss 3.17985415
Trained batch 1460 batch loss 2.66417575 epoch total loss 3.17950082
Trained batch 1461 batch loss 2.86559415 epoch total loss 3.179286
Trained batch 1462 batch loss 2.62102342 epoch total loss 3.17890429
Trained batch 1463 batch loss 2.82731414 epoch total loss 3.17866397
Trained batch 1464 batch loss 3.0445075 epoch total loss 3.17857218
Trained batch 1465 batch loss 3.10272098 epoch total loss 3.1785202
Trained batch 1466 batch loss 3.27644873 epoch total loss 3.17858696
Trained batch 1467 batch loss 3.20446897 

Trained batch 1573 batch loss 2.91171336 epoch total loss 3.1662302
Trained batch 1574 batch loss 2.86126852 epoch total loss 3.16603637
Trained batch 1575 batch loss 2.75827575 epoch total loss 3.16577744
Trained batch 1576 batch loss 2.92105103 epoch total loss 3.16562223
Trained batch 1577 batch loss 2.74455595 epoch total loss 3.16535521
Trained batch 1578 batch loss 2.86757135 epoch total loss 3.16516662
Trained batch 1579 batch loss 3.13247108 epoch total loss 3.16514564
Trained batch 1580 batch loss 3.1090827 epoch total loss 3.16511
Trained batch 1581 batch loss 3.22509933 epoch total loss 3.16514802
Trained batch 1582 batch loss 3.01830649 epoch total loss 3.16505504
Trained batch 1583 batch loss 2.65965271 epoch total loss 3.16473579
Trained batch 1584 batch loss 2.70708156 epoch total loss 3.16444683
Trained batch 1585 batch loss 3.011904 epoch total loss 3.16435051
Trained batch 1586 batch loss 2.76902056 epoch total loss 3.16410136
Trained batch 1587 batch loss 2.79510593 

Trained batch 1693 batch loss 3.01525402 epoch total loss 3.1528666
Trained batch 1694 batch loss 2.95930767 epoch total loss 3.1527524
Trained batch 1695 batch loss 2.77160859 epoch total loss 3.15252757
Trained batch 1696 batch loss 3.03875852 epoch total loss 3.15246034
Trained batch 1697 batch loss 2.99318528 epoch total loss 3.1523664
Trained batch 1698 batch loss 2.8725481 epoch total loss 3.15220165
Trained batch 1699 batch loss 2.90357208 epoch total loss 3.1520555
Trained batch 1700 batch loss 3.16365671 epoch total loss 3.15206218
Trained batch 1701 batch loss 2.91820812 epoch total loss 3.15192461
Trained batch 1702 batch loss 2.85330415 epoch total loss 3.15174937
Trained batch 1703 batch loss 2.81618404 epoch total loss 3.15155244
Trained batch 1704 batch loss 2.96278214 epoch total loss 3.15144157
Trained batch 1705 batch loss 2.71770525 epoch total loss 3.15118742
Trained batch 1706 batch loss 2.94412041 epoch total loss 3.15106606
Trained batch 1707 batch loss 2.7173674

Trained batch 1813 batch loss 2.80847549 epoch total loss 3.1387887
Trained batch 1814 batch loss 2.39418173 epoch total loss 3.13837814
Trained batch 1815 batch loss 2.90479469 epoch total loss 3.1382494
Trained batch 1816 batch loss 3.18806958 epoch total loss 3.13827682
Trained batch 1817 batch loss 2.96774554 epoch total loss 3.13818312
Trained batch 1818 batch loss 2.95500326 epoch total loss 3.13808227
Trained batch 1819 batch loss 2.96824265 epoch total loss 3.13798904
Trained batch 1820 batch loss 2.7150588 epoch total loss 3.13775659
Trained batch 1821 batch loss 2.86721539 epoch total loss 3.13760781
Trained batch 1822 batch loss 2.87453461 epoch total loss 3.13746357
Trained batch 1823 batch loss 2.81644392 epoch total loss 3.13728738
Trained batch 1824 batch loss 2.81284356 epoch total loss 3.13710952
Trained batch 1825 batch loss 2.70724607 epoch total loss 3.13687396
Trained batch 1826 batch loss 2.74885273 epoch total loss 3.13666153
Trained batch 1827 batch loss 3.01373

Trained batch 1933 batch loss 2.78044105 epoch total loss 3.12510657
Trained batch 1934 batch loss 2.81237721 epoch total loss 3.12494493
Trained batch 1935 batch loss 3.15829659 epoch total loss 3.12496209
Trained batch 1936 batch loss 3.03442192 epoch total loss 3.12491536
Trained batch 1937 batch loss 3.05250025 epoch total loss 3.12487793
Trained batch 1938 batch loss 3.11394095 epoch total loss 3.12487221
Trained batch 1939 batch loss 2.82771492 epoch total loss 3.1247189
Trained batch 1940 batch loss 2.93095636 epoch total loss 3.12461925
Trained batch 1941 batch loss 2.99046636 epoch total loss 3.12454987
Trained batch 1942 batch loss 2.95106459 epoch total loss 3.1244607
Trained batch 1943 batch loss 2.88459897 epoch total loss 3.1243372
Trained batch 1944 batch loss 3.14981604 epoch total loss 3.12435055
Trained batch 1945 batch loss 3.16239572 epoch total loss 3.12437
Trained batch 1946 batch loss 2.81821346 epoch total loss 3.12421298
Trained batch 1947 batch loss 2.95201302

Trained batch 2053 batch loss 2.9720397 epoch total loss 3.10963798
Trained batch 2054 batch loss 3.01838636 epoch total loss 3.10959363
Trained batch 2055 batch loss 2.75447702 epoch total loss 3.10942078
Trained batch 2056 batch loss 2.91868186 epoch total loss 3.10932803
Trained batch 2057 batch loss 2.79101324 epoch total loss 3.1091733
Trained batch 2058 batch loss 2.47088146 epoch total loss 3.10886288
Trained batch 2059 batch loss 2.58818913 epoch total loss 3.10861015
Trained batch 2060 batch loss 2.7327044 epoch total loss 3.10842776
Trained batch 2061 batch loss 2.99043465 epoch total loss 3.10837054
Trained batch 2062 batch loss 2.93967056 epoch total loss 3.10828853
Trained batch 2063 batch loss 3.09230089 epoch total loss 3.1082809
Trained batch 2064 batch loss 2.90836835 epoch total loss 3.10818386
Trained batch 2065 batch loss 2.89989877 epoch total loss 3.10808301
Trained batch 2066 batch loss 2.65662861 epoch total loss 3.10786462
Trained batch 2067 batch loss 2.800865

Trained batch 2173 batch loss 2.63704777 epoch total loss 3.09353113
Trained batch 2174 batch loss 2.19299293 epoch total loss 3.09311676
Trained batch 2175 batch loss 2.18266106 epoch total loss 3.09269834
Trained batch 2176 batch loss 2.41997623 epoch total loss 3.09238911
Trained batch 2177 batch loss 2.30639076 epoch total loss 3.0920279
Trained batch 2178 batch loss 2.5100646 epoch total loss 3.09176087
Trained batch 2179 batch loss 2.65950346 epoch total loss 3.09156251
Trained batch 2180 batch loss 2.8295958 epoch total loss 3.09144235
Trained batch 2181 batch loss 2.78749394 epoch total loss 3.09130311
Trained batch 2182 batch loss 2.54521394 epoch total loss 3.09105277
Trained batch 2183 batch loss 2.83303738 epoch total loss 3.09093451
Trained batch 2184 batch loss 2.98537397 epoch total loss 3.09088635
Trained batch 2185 batch loss 2.75534725 epoch total loss 3.09073281
Trained batch 2186 batch loss 2.63883591 epoch total loss 3.09052587
Trained batch 2187 batch loss 2.75258

Trained batch 2293 batch loss 2.82552242 epoch total loss 3.07682252
Trained batch 2294 batch loss 2.68763185 epoch total loss 3.076653
Trained batch 2295 batch loss 3.24044895 epoch total loss 3.07672429
Trained batch 2296 batch loss 3.06936598 epoch total loss 3.07672095
Trained batch 2297 batch loss 2.90611148 epoch total loss 3.0766468
Trained batch 2298 batch loss 2.8114152 epoch total loss 3.07653141
Trained batch 2299 batch loss 3.1301589 epoch total loss 3.07655478
Trained batch 2300 batch loss 3.43704915 epoch total loss 3.07671142
Trained batch 2301 batch loss 3.45300388 epoch total loss 3.07687521
Trained batch 2302 batch loss 3.2970922 epoch total loss 3.07697058
Trained batch 2303 batch loss 3.04711223 epoch total loss 3.0769577
Trained batch 2304 batch loss 3.11603212 epoch total loss 3.07697463
Trained batch 2305 batch loss 2.98680115 epoch total loss 3.07693553
Trained batch 2306 batch loss 2.86957526 epoch total loss 3.07684565
Trained batch 2307 batch loss 2.80559587 

Trained batch 2413 batch loss 2.79837036 epoch total loss 3.06466651
Trained batch 2414 batch loss 2.74125624 epoch total loss 3.06453252
Trained batch 2415 batch loss 2.80377674 epoch total loss 3.06442451
Trained batch 2416 batch loss 2.73727 epoch total loss 3.06428909
Trained batch 2417 batch loss 2.7898531 epoch total loss 3.06417561
Trained batch 2418 batch loss 2.83251143 epoch total loss 3.06407976
Trained batch 2419 batch loss 2.85231686 epoch total loss 3.06399226
Trained batch 2420 batch loss 2.84981823 epoch total loss 3.06390381
Trained batch 2421 batch loss 2.87270784 epoch total loss 3.06382465
Trained batch 2422 batch loss 2.91906714 epoch total loss 3.06376481
Trained batch 2423 batch loss 2.82850409 epoch total loss 3.06366777
Trained batch 2424 batch loss 2.77844095 epoch total loss 3.06355
Trained batch 2425 batch loss 2.86215067 epoch total loss 3.06346703
Trained batch 2426 batch loss 3.01690388 epoch total loss 3.06344795
Trained batch 2427 batch loss 2.68616819 

Trained batch 2533 batch loss 3.18783 epoch total loss 3.05397892
Trained batch 2534 batch loss 3.0526967 epoch total loss 3.05397844
Trained batch 2535 batch loss 2.88047171 epoch total loss 3.05391
Trained batch 2536 batch loss 2.77614665 epoch total loss 3.05380058
Trained batch 2537 batch loss 2.59236097 epoch total loss 3.05361867
Trained batch 2538 batch loss 2.38361216 epoch total loss 3.05335474
Trained batch 2539 batch loss 2.79575872 epoch total loss 3.05325317
Trained batch 2540 batch loss 2.81816864 epoch total loss 3.05316067
Trained batch 2541 batch loss 2.93957853 epoch total loss 3.05311608
Trained batch 2542 batch loss 3.0212822 epoch total loss 3.05310345
Trained batch 2543 batch loss 2.83530569 epoch total loss 3.05301785
Trained batch 2544 batch loss 2.73015046 epoch total loss 3.05289102
Trained batch 2545 batch loss 2.98443365 epoch total loss 3.05286407
Trained batch 2546 batch loss 2.99027586 epoch total loss 3.05283952
Trained batch 2547 batch loss 2.9483614 ep

Trained batch 2653 batch loss 2.82947922 epoch total loss 3.04327607
Trained batch 2654 batch loss 3.04703236 epoch total loss 3.0432775
Trained batch 2655 batch loss 2.88166428 epoch total loss 3.04321671
Trained batch 2656 batch loss 2.7746613 epoch total loss 3.04311562
Trained batch 2657 batch loss 2.65577769 epoch total loss 3.04297
Trained batch 2658 batch loss 2.73132944 epoch total loss 3.04285264
Trained batch 2659 batch loss 2.90451169 epoch total loss 3.04280043
Trained batch 2660 batch loss 3.1045568 epoch total loss 3.04282379
Trained batch 2661 batch loss 3.07062364 epoch total loss 3.04283428
Trained batch 2662 batch loss 2.95573902 epoch total loss 3.04280138
Trained batch 2663 batch loss 2.50959206 epoch total loss 3.04260135
Trained batch 2664 batch loss 2.90269089 epoch total loss 3.04254889
Trained batch 2665 batch loss 2.96047735 epoch total loss 3.04251814
Trained batch 2666 batch loss 2.69022179 epoch total loss 3.04238605
Trained batch 2667 batch loss 2.66941619

Trained batch 2773 batch loss 2.62767076 epoch total loss 3.0317328
Trained batch 2774 batch loss 2.4261539 epoch total loss 3.03151441
Trained batch 2775 batch loss 2.79421186 epoch total loss 3.03142881
Trained batch 2776 batch loss 2.58395314 epoch total loss 3.03126764
Trained batch 2777 batch loss 2.70122385 epoch total loss 3.03114867
Trained batch 2778 batch loss 2.5563736 epoch total loss 3.03097796
Trained batch 2779 batch loss 2.94666624 epoch total loss 3.03094745
Trained batch 2780 batch loss 2.95742941 epoch total loss 3.03092074
Trained batch 2781 batch loss 2.87594151 epoch total loss 3.03086519
Epoch 1 train loss 3.03086519241333
Validated batch 1 batch loss 3.09293604
Validated batch 2 batch loss 2.89222503
Validated batch 3 batch loss 2.56747866
Validated batch 4 batch loss 2.98709846
Validated batch 5 batch loss 2.67098141
Validated batch 6 batch loss 2.68826628
Validated batch 7 batch loss 2.56337357
Validated batch 8 batch loss 2.72763824
Validated batch 9 batch lo

Validated batch 184 batch loss 2.7848196
Validated batch 185 batch loss 2.96185327
Validated batch 186 batch loss 2.89553308
Validated batch 187 batch loss 3.07734966
Validated batch 188 batch loss 2.8008039
Validated batch 189 batch loss 2.32334757
Validated batch 190 batch loss 2.84593344
Validated batch 191 batch loss 2.8745389
Validated batch 192 batch loss 2.6333971
Validated batch 193 batch loss 2.7011838
Validated batch 194 batch loss 2.95599461
Validated batch 195 batch loss 2.57541418
Validated batch 196 batch loss 2.88368368
Validated batch 197 batch loss 2.77352262
Validated batch 198 batch loss 2.6773603
Validated batch 199 batch loss 2.67159557
Validated batch 200 batch loss 2.91262937
Validated batch 201 batch loss 2.15789294
Validated batch 202 batch loss 3.14663601
Validated batch 203 batch loss 3.17329693
Validated batch 204 batch loss 2.76728702
Validated batch 205 batch loss 2.69797444
Validated batch 206 batch loss 2.65719295
Validated batch 207 batch loss 2.5506744

Trained batch 5 batch loss 2.8187561 epoch total loss 2.73366308
Trained batch 6 batch loss 2.61777115 epoch total loss 2.71434784
Trained batch 7 batch loss 2.66100621 epoch total loss 2.70672774
Trained batch 8 batch loss 2.55071688 epoch total loss 2.6872263
Trained batch 9 batch loss 2.58898067 epoch total loss 2.6763103
Trained batch 10 batch loss 2.55601978 epoch total loss 2.66428113
Trained batch 11 batch loss 2.41157579 epoch total loss 2.64130783
Trained batch 12 batch loss 3.01319 epoch total loss 2.67229819
Trained batch 13 batch loss 2.92795444 epoch total loss 2.69196415
Trained batch 14 batch loss 2.50475645 epoch total loss 2.67859221
Trained batch 15 batch loss 2.50335288 epoch total loss 2.66690946
Trained batch 16 batch loss 2.56388044 epoch total loss 2.66047025
Trained batch 17 batch loss 2.72091269 epoch total loss 2.66402578
Trained batch 18 batch loss 2.7130127 epoch total loss 2.66674709
Trained batch 19 batch loss 2.42993355 epoch total loss 2.65428329
Trained

Trained batch 128 batch loss 2.92381501 epoch total loss 2.71709418
Trained batch 129 batch loss 2.74777603 epoch total loss 2.71733189
Trained batch 130 batch loss 2.38298392 epoch total loss 2.71476
Trained batch 131 batch loss 2.71680307 epoch total loss 2.7147758
Trained batch 132 batch loss 2.68849587 epoch total loss 2.71457672
Trained batch 133 batch loss 2.51882339 epoch total loss 2.71310496
Trained batch 134 batch loss 2.80567145 epoch total loss 2.71379566
Trained batch 135 batch loss 3.18033171 epoch total loss 2.71725154
Trained batch 136 batch loss 3.29997611 epoch total loss 2.7215364
Trained batch 137 batch loss 2.72285128 epoch total loss 2.7215457
Trained batch 138 batch loss 2.92032814 epoch total loss 2.72298622
Trained batch 139 batch loss 2.68352842 epoch total loss 2.72270226
Trained batch 140 batch loss 2.67752552 epoch total loss 2.72237968
Trained batch 141 batch loss 2.38609433 epoch total loss 2.71999478
Trained batch 142 batch loss 2.57975602 epoch total lo

Trained batch 250 batch loss 2.71655178 epoch total loss 2.74694228
Trained batch 251 batch loss 2.68556285 epoch total loss 2.74669766
Trained batch 252 batch loss 2.66611 epoch total loss 2.74637818
Trained batch 253 batch loss 2.8045435 epoch total loss 2.74660802
Trained batch 254 batch loss 2.7693634 epoch total loss 2.74669766
Trained batch 255 batch loss 2.68275785 epoch total loss 2.74644685
Trained batch 256 batch loss 2.62572145 epoch total loss 2.74597526
Trained batch 257 batch loss 2.60971689 epoch total loss 2.74544525
Trained batch 258 batch loss 2.56666851 epoch total loss 2.74475217
Trained batch 259 batch loss 2.99005342 epoch total loss 2.74569917
Trained batch 260 batch loss 3.02300215 epoch total loss 2.74676585
Trained batch 261 batch loss 2.8982513 epoch total loss 2.74734616
Trained batch 262 batch loss 2.89437819 epoch total loss 2.7479074
Trained batch 263 batch loss 3.20205951 epoch total loss 2.74963427
Trained batch 264 batch loss 2.71599054 epoch total los

Trained batch 372 batch loss 2.44511914 epoch total loss 2.75221086
Trained batch 373 batch loss 2.61088657 epoch total loss 2.75183201
Trained batch 374 batch loss 2.4286046 epoch total loss 2.75096774
Trained batch 375 batch loss 2.56819224 epoch total loss 2.75048041
Trained batch 376 batch loss 2.70763373 epoch total loss 2.75036645
Trained batch 377 batch loss 2.86348152 epoch total loss 2.75066662
Trained batch 378 batch loss 3.20161724 epoch total loss 2.75186
Trained batch 379 batch loss 2.98093867 epoch total loss 2.75246429
Trained batch 380 batch loss 2.69721079 epoch total loss 2.7523191
Trained batch 381 batch loss 2.64999175 epoch total loss 2.75205064
Trained batch 382 batch loss 2.73342061 epoch total loss 2.75200176
Trained batch 383 batch loss 2.56292629 epoch total loss 2.75150776
Trained batch 384 batch loss 2.55911016 epoch total loss 2.75100684
Trained batch 385 batch loss 2.32842255 epoch total loss 2.74990892
Trained batch 386 batch loss 2.3564198 epoch total lo

Trained batch 494 batch loss 2.74517608 epoch total loss 2.751724
Trained batch 495 batch loss 2.92470789 epoch total loss 2.75207353
Trained batch 496 batch loss 2.96374416 epoch total loss 2.7525003
Trained batch 497 batch loss 2.58532333 epoch total loss 2.75216389
Trained batch 498 batch loss 2.68095136 epoch total loss 2.75202084
Trained batch 499 batch loss 2.65850925 epoch total loss 2.75183344
Trained batch 500 batch loss 2.4773736 epoch total loss 2.7512846
Trained batch 501 batch loss 2.60164785 epoch total loss 2.7509861
Trained batch 502 batch loss 2.72831249 epoch total loss 2.7509408
Trained batch 503 batch loss 2.75157928 epoch total loss 2.75094199
Trained batch 504 batch loss 2.88805461 epoch total loss 2.75121427
Trained batch 505 batch loss 2.6582191 epoch total loss 2.75103
Trained batch 506 batch loss 2.67158103 epoch total loss 2.75087309
Trained batch 507 batch loss 2.19783783 epoch total loss 2.74978232
Trained batch 508 batch loss 2.26078248 epoch total loss 2.

Trained batch 616 batch loss 2.55134964 epoch total loss 2.75283217
Trained batch 617 batch loss 2.76484919 epoch total loss 2.75285172
Trained batch 618 batch loss 2.83860683 epoch total loss 2.75299048
Trained batch 619 batch loss 2.88187075 epoch total loss 2.75319862
Trained batch 620 batch loss 2.37139559 epoch total loss 2.75258279
Trained batch 621 batch loss 2.40072417 epoch total loss 2.75201631
Trained batch 622 batch loss 2.77019477 epoch total loss 2.75204539
Trained batch 623 batch loss 2.34488583 epoch total loss 2.75139165
Trained batch 624 batch loss 2.46728659 epoch total loss 2.75093651
Trained batch 625 batch loss 2.79129601 epoch total loss 2.75100088
Trained batch 626 batch loss 2.50721192 epoch total loss 2.75061154
Trained batch 627 batch loss 2.77053 epoch total loss 2.75064325
Trained batch 628 batch loss 2.86571169 epoch total loss 2.7508266
Trained batch 629 batch loss 2.93608069 epoch total loss 2.75112104
Trained batch 630 batch loss 2.82531929 epoch total 

Trained batch 737 batch loss 2.58018446 epoch total loss 2.73257208
Trained batch 738 batch loss 2.46163034 epoch total loss 2.73220491
Trained batch 739 batch loss 2.67886305 epoch total loss 2.73213291
Trained batch 740 batch loss 2.54272223 epoch total loss 2.73187685
Trained batch 741 batch loss 2.5007081 epoch total loss 2.731565
Trained batch 742 batch loss 2.43141627 epoch total loss 2.7311604
Trained batch 743 batch loss 2.59843731 epoch total loss 2.73098159
Trained batch 744 batch loss 2.80993462 epoch total loss 2.73108768
Trained batch 745 batch loss 2.84853578 epoch total loss 2.73124552
Trained batch 746 batch loss 2.50025392 epoch total loss 2.73093581
Trained batch 747 batch loss 2.63304543 epoch total loss 2.73080468
Trained batch 748 batch loss 2.34863544 epoch total loss 2.73029375
Trained batch 749 batch loss 2.27681684 epoch total loss 2.72968841
Trained batch 750 batch loss 2.38312578 epoch total loss 2.72922635
Trained batch 751 batch loss 2.16328788 epoch total 

Trained batch 859 batch loss 2.4491303 epoch total loss 2.7248435
Trained batch 860 batch loss 2.39201212 epoch total loss 2.72445655
Trained batch 861 batch loss 2.35159302 epoch total loss 2.72402358
Trained batch 862 batch loss 2.72477198 epoch total loss 2.72402453
Trained batch 863 batch loss 2.86197472 epoch total loss 2.72418451
Trained batch 864 batch loss 2.83048701 epoch total loss 2.72430754
Trained batch 865 batch loss 2.58965373 epoch total loss 2.72415185
Trained batch 866 batch loss 2.67724204 epoch total loss 2.72409773
Trained batch 867 batch loss 2.75214386 epoch total loss 2.72413015
Trained batch 868 batch loss 2.60433388 epoch total loss 2.72399211
Trained batch 869 batch loss 2.31612134 epoch total loss 2.72352266
Trained batch 870 batch loss 2.39727712 epoch total loss 2.72314763
Trained batch 871 batch loss 2.5009923 epoch total loss 2.72289252
Trained batch 872 batch loss 2.84874415 epoch total loss 2.72303677
Trained batch 873 batch loss 3.02752924 epoch total

Trained batch 981 batch loss 2.87168694 epoch total loss 2.71474504
Trained batch 982 batch loss 2.62655067 epoch total loss 2.71465516
Trained batch 983 batch loss 2.62717032 epoch total loss 2.71456599
Trained batch 984 batch loss 2.67975354 epoch total loss 2.71453071
Trained batch 985 batch loss 3.03311157 epoch total loss 2.71485424
Trained batch 986 batch loss 2.94658184 epoch total loss 2.71508908
Trained batch 987 batch loss 2.87004709 epoch total loss 2.7152462
Trained batch 988 batch loss 2.65977573 epoch total loss 2.71519
Trained batch 989 batch loss 2.76148653 epoch total loss 2.71523666
Trained batch 990 batch loss 2.59442043 epoch total loss 2.71511483
Trained batch 991 batch loss 3.13179922 epoch total loss 2.71553516
Trained batch 992 batch loss 2.84054208 epoch total loss 2.71566129
Trained batch 993 batch loss 2.86516142 epoch total loss 2.71581197
Trained batch 994 batch loss 3.00211954 epoch total loss 2.7161
Trained batch 995 batch loss 3.12196302 epoch total loss

Trained batch 1101 batch loss 2.31086349 epoch total loss 2.7086668
Trained batch 1102 batch loss 2.46103072 epoch total loss 2.70844197
Trained batch 1103 batch loss 2.48259306 epoch total loss 2.70823741
Trained batch 1104 batch loss 2.54765224 epoch total loss 2.70809174
Trained batch 1105 batch loss 2.64489317 epoch total loss 2.70803452
Trained batch 1106 batch loss 2.58845949 epoch total loss 2.70792627
Trained batch 1107 batch loss 2.56282258 epoch total loss 2.70779514
Trained batch 1108 batch loss 2.54313803 epoch total loss 2.70764661
Trained batch 1109 batch loss 2.47947311 epoch total loss 2.70744085
Trained batch 1110 batch loss 2.41517115 epoch total loss 2.70717764
Trained batch 1111 batch loss 2.19981027 epoch total loss 2.70672107
Trained batch 1112 batch loss 2.79231119 epoch total loss 2.70679784
Trained batch 1113 batch loss 2.44858503 epoch total loss 2.70656586
Trained batch 1114 batch loss 2.46215987 epoch total loss 2.70634627
Trained batch 1115 batch loss 2.151

Trained batch 1221 batch loss 2.41467404 epoch total loss 2.70467615
Trained batch 1222 batch loss 2.35618019 epoch total loss 2.704391
Trained batch 1223 batch loss 2.37630105 epoch total loss 2.70412278
Trained batch 1224 batch loss 2.35779905 epoch total loss 2.70383978
Trained batch 1225 batch loss 2.52029228 epoch total loss 2.70369
Trained batch 1226 batch loss 2.48147845 epoch total loss 2.70350885
Trained batch 1227 batch loss 2.44026375 epoch total loss 2.70329404
Trained batch 1228 batch loss 2.97484779 epoch total loss 2.70351529
Trained batch 1229 batch loss 2.6851697 epoch total loss 2.70350027
Trained batch 1230 batch loss 2.58473659 epoch total loss 2.70340371
Trained batch 1231 batch loss 2.46619201 epoch total loss 2.70321107
Trained batch 1232 batch loss 2.69292116 epoch total loss 2.70320272
Trained batch 1233 batch loss 3.15196848 epoch total loss 2.70356655
Trained batch 1234 batch loss 2.91761947 epoch total loss 2.70374012
Trained batch 1235 batch loss 2.68956757

Trained batch 1341 batch loss 2.80287576 epoch total loss 2.70759916
Trained batch 1342 batch loss 2.44657063 epoch total loss 2.70740461
Trained batch 1343 batch loss 2.52196169 epoch total loss 2.70726657
Trained batch 1344 batch loss 2.56751347 epoch total loss 2.70716262
Trained batch 1345 batch loss 2.44739699 epoch total loss 2.7069695
Trained batch 1346 batch loss 2.47676539 epoch total loss 2.70679855
Trained batch 1347 batch loss 2.4140451 epoch total loss 2.70658112
Trained batch 1348 batch loss 2.64579439 epoch total loss 2.70653605
Trained batch 1349 batch loss 2.52183771 epoch total loss 2.7063992
Trained batch 1350 batch loss 2.55343485 epoch total loss 2.70628572
Trained batch 1351 batch loss 2.58222413 epoch total loss 2.70619392
Trained batch 1352 batch loss 2.5957756 epoch total loss 2.70611238
Trained batch 1353 batch loss 2.63401699 epoch total loss 2.70605898
Trained batch 1354 batch loss 2.56523418 epoch total loss 2.70595503
Trained batch 1355 batch loss 2.610542

Trained batch 1461 batch loss 2.76275182 epoch total loss 2.69965887
Trained batch 1462 batch loss 2.88453937 epoch total loss 2.69978523
Trained batch 1463 batch loss 2.77582932 epoch total loss 2.69983745
Trained batch 1464 batch loss 2.65672922 epoch total loss 2.69980788
Trained batch 1465 batch loss 2.54249072 epoch total loss 2.69970059
Trained batch 1466 batch loss 2.66201544 epoch total loss 2.69967484
Trained batch 1467 batch loss 2.89915442 epoch total loss 2.69981098
Trained batch 1468 batch loss 2.68195772 epoch total loss 2.69979858
Trained batch 1469 batch loss 2.9266634 epoch total loss 2.69995308
Trained batch 1470 batch loss 3.09119582 epoch total loss 2.70021939
Trained batch 1471 batch loss 2.59937906 epoch total loss 2.70015073
Trained batch 1472 batch loss 2.67306614 epoch total loss 2.70013237
Trained batch 1473 batch loss 2.65726066 epoch total loss 2.70010328
Trained batch 1474 batch loss 2.67563629 epoch total loss 2.70008659
Trained batch 1475 batch loss 2.426

Trained batch 1581 batch loss 2.78697515 epoch total loss 2.6960454
Trained batch 1582 batch loss 2.61689663 epoch total loss 2.69599533
Trained batch 1583 batch loss 2.88922358 epoch total loss 2.69611716
Trained batch 1584 batch loss 2.87932944 epoch total loss 2.69623303
Trained batch 1585 batch loss 2.84684443 epoch total loss 2.69632792
Trained batch 1586 batch loss 3.00294447 epoch total loss 2.69652128
Trained batch 1587 batch loss 2.76469827 epoch total loss 2.6965642
Trained batch 1588 batch loss 2.67199969 epoch total loss 2.69654846
Trained batch 1589 batch loss 2.5644381 epoch total loss 2.69646549
Trained batch 1590 batch loss 2.38702559 epoch total loss 2.69627094
Trained batch 1591 batch loss 2.49039817 epoch total loss 2.69614148
Trained batch 1592 batch loss 2.75393438 epoch total loss 2.69617772
Trained batch 1593 batch loss 2.87362432 epoch total loss 2.69628906
Trained batch 1594 batch loss 2.80690098 epoch total loss 2.69635868
Trained batch 1595 batch loss 2.75092

Trained batch 1701 batch loss 2.61371613 epoch total loss 2.6872592
Trained batch 1702 batch loss 2.73767829 epoch total loss 2.68728876
Trained batch 1703 batch loss 2.68266964 epoch total loss 2.68728614
Trained batch 1704 batch loss 2.56904554 epoch total loss 2.68721652
Trained batch 1705 batch loss 2.91414952 epoch total loss 2.68734956
Trained batch 1706 batch loss 2.68084455 epoch total loss 2.68734574
Trained batch 1707 batch loss 2.68160105 epoch total loss 2.68734241
Trained batch 1708 batch loss 2.43693638 epoch total loss 2.68719578
Trained batch 1709 batch loss 2.75076389 epoch total loss 2.68723321
Trained batch 1710 batch loss 2.38817906 epoch total loss 2.68705821
Trained batch 1711 batch loss 2.37064791 epoch total loss 2.6868732
Trained batch 1712 batch loss 2.63334942 epoch total loss 2.68684196
Trained batch 1713 batch loss 2.49483871 epoch total loss 2.68673
Trained batch 1714 batch loss 2.41570759 epoch total loss 2.6865716
Trained batch 1715 batch loss 2.37694287

Trained batch 1821 batch loss 2.6782763 epoch total loss 2.68152452
Trained batch 1822 batch loss 2.41586041 epoch total loss 2.68137884
Trained batch 1823 batch loss 2.87345147 epoch total loss 2.68148422
Trained batch 1824 batch loss 2.86252761 epoch total loss 2.6815834
Trained batch 1825 batch loss 2.74720931 epoch total loss 2.68161917
Trained batch 1826 batch loss 2.78266358 epoch total loss 2.68167448
Trained batch 1827 batch loss 2.49101281 epoch total loss 2.68157029
Trained batch 1828 batch loss 2.72654867 epoch total loss 2.68159485
Trained batch 1829 batch loss 2.62464714 epoch total loss 2.68156385
Trained batch 1830 batch loss 2.52834916 epoch total loss 2.68148
Trained batch 1831 batch loss 2.54434562 epoch total loss 2.68140507
Trained batch 1832 batch loss 2.86040664 epoch total loss 2.68150282
Trained batch 1833 batch loss 2.78971291 epoch total loss 2.68156171
Trained batch 1834 batch loss 2.68538165 epoch total loss 2.68156385
Trained batch 1835 batch loss 2.5202727

Trained batch 1941 batch loss 2.71247435 epoch total loss 2.68071675
Trained batch 1942 batch loss 2.02803493 epoch total loss 2.68038034
Trained batch 1943 batch loss 2.40293765 epoch total loss 2.68023753
Trained batch 1944 batch loss 2.52878737 epoch total loss 2.68015981
Trained batch 1945 batch loss 2.57650185 epoch total loss 2.6801064
Trained batch 1946 batch loss 2.71573734 epoch total loss 2.68012476
Trained batch 1947 batch loss 2.43037 epoch total loss 2.67999649
Trained batch 1948 batch loss 2.20067501 epoch total loss 2.67975044
Trained batch 1949 batch loss 2.39258957 epoch total loss 2.6796031
Trained batch 1950 batch loss 2.24556589 epoch total loss 2.67938042
Trained batch 1951 batch loss 2.3634038 epoch total loss 2.67921853
Trained batch 1952 batch loss 2.54046941 epoch total loss 2.67914748
Trained batch 1953 batch loss 2.25286579 epoch total loss 2.67892909
Trained batch 1954 batch loss 2.32246518 epoch total loss 2.6787467
Trained batch 1955 batch loss 2.54380631 

Trained batch 2061 batch loss 2.61402059 epoch total loss 2.68122554
Trained batch 2062 batch loss 2.85520029 epoch total loss 2.6813097
Trained batch 2063 batch loss 2.74322939 epoch total loss 2.68133974
Trained batch 2064 batch loss 2.68459892 epoch total loss 2.68134141
Trained batch 2065 batch loss 2.92806268 epoch total loss 2.68146086
Trained batch 2066 batch loss 2.79506826 epoch total loss 2.68151593
Trained batch 2067 batch loss 2.68813562 epoch total loss 2.68151903
Trained batch 2068 batch loss 2.81819129 epoch total loss 2.68158507
Trained batch 2069 batch loss 3.04444504 epoch total loss 2.68176055
Trained batch 2070 batch loss 2.67637539 epoch total loss 2.68175793
Trained batch 2071 batch loss 2.67733097 epoch total loss 2.68175578
Trained batch 2072 batch loss 3.01547766 epoch total loss 2.68191671
Trained batch 2073 batch loss 2.80568552 epoch total loss 2.68197656
Trained batch 2074 batch loss 2.62599421 epoch total loss 2.68194962
Trained batch 2075 batch loss 2.277

Trained batch 2181 batch loss 3.03431416 epoch total loss 2.67671
Trained batch 2182 batch loss 2.82619095 epoch total loss 2.67677832
Trained batch 2183 batch loss 2.8031621 epoch total loss 2.67683625
Trained batch 2184 batch loss 3.05124426 epoch total loss 2.67700768
Trained batch 2185 batch loss 2.60772872 epoch total loss 2.6769762
Trained batch 2186 batch loss 2.77874517 epoch total loss 2.6770227
Trained batch 2187 batch loss 2.81781578 epoch total loss 2.67708707
Trained batch 2188 batch loss 3.14373803 epoch total loss 2.67730021
Trained batch 2189 batch loss 2.84472299 epoch total loss 2.67737675
Trained batch 2190 batch loss 2.64671612 epoch total loss 2.67736268
Trained batch 2191 batch loss 2.57298422 epoch total loss 2.677315
Trained batch 2192 batch loss 2.68455076 epoch total loss 2.67731833
Trained batch 2193 batch loss 2.91175842 epoch total loss 2.67742515
Trained batch 2194 batch loss 2.81608462 epoch total loss 2.67748833
Trained batch 2195 batch loss 2.90365934 e

Trained batch 2301 batch loss 2.72770262 epoch total loss 2.67196584
Trained batch 2302 batch loss 2.76967192 epoch total loss 2.67200828
Trained batch 2303 batch loss 2.76771927 epoch total loss 2.67204976
Trained batch 2304 batch loss 2.62235236 epoch total loss 2.6720283
Trained batch 2305 batch loss 2.51813483 epoch total loss 2.67196155
Trained batch 2306 batch loss 2.50986505 epoch total loss 2.67189097
Trained batch 2307 batch loss 2.47607422 epoch total loss 2.6718061
Trained batch 2308 batch loss 2.61717129 epoch total loss 2.67178249
Trained batch 2309 batch loss 2.48361254 epoch total loss 2.67170095
Trained batch 2310 batch loss 2.43879271 epoch total loss 2.6716
Trained batch 2311 batch loss 2.40767527 epoch total loss 2.67148614
Trained batch 2312 batch loss 2.46422791 epoch total loss 2.67139649
Trained batch 2313 batch loss 2.43417978 epoch total loss 2.67129374
Trained batch 2314 batch loss 2.39213824 epoch total loss 2.6711731
Trained batch 2315 batch loss 2.77180505 

Trained batch 2421 batch loss 2.45263577 epoch total loss 2.66668653
Trained batch 2422 batch loss 2.56071758 epoch total loss 2.66664267
Trained batch 2423 batch loss 2.74264526 epoch total loss 2.6666739
Trained batch 2424 batch loss 2.76351309 epoch total loss 2.66671395
Trained batch 2425 batch loss 2.42350769 epoch total loss 2.66661358
Trained batch 2426 batch loss 2.80256438 epoch total loss 2.66666985
Trained batch 2427 batch loss 2.55831456 epoch total loss 2.66662502
Trained batch 2428 batch loss 2.72436428 epoch total loss 2.66664863
Trained batch 2429 batch loss 2.63327551 epoch total loss 2.66663504
Trained batch 2430 batch loss 2.50669718 epoch total loss 2.66656923
Trained batch 2431 batch loss 2.68558311 epoch total loss 2.6665771
Trained batch 2432 batch loss 2.64017558 epoch total loss 2.66656613
Trained batch 2433 batch loss 2.44215 epoch total loss 2.6664741
Trained batch 2434 batch loss 2.63948941 epoch total loss 2.6664629
Trained batch 2435 batch loss 2.57849288 

Trained batch 2541 batch loss 2.62049294 epoch total loss 2.66407514
Trained batch 2542 batch loss 2.673383 epoch total loss 2.66407871
Trained batch 2543 batch loss 2.76091576 epoch total loss 2.66411686
Trained batch 2544 batch loss 2.7528162 epoch total loss 2.66415167
Trained batch 2545 batch loss 2.85152078 epoch total loss 2.66422534
Trained batch 2546 batch loss 2.83721042 epoch total loss 2.66429329
Trained batch 2547 batch loss 2.7220397 epoch total loss 2.66431618
Trained batch 2548 batch loss 2.32347059 epoch total loss 2.66418219
Trained batch 2549 batch loss 2.50522852 epoch total loss 2.66412
Trained batch 2550 batch loss 2.37191415 epoch total loss 2.66400552
Trained batch 2551 batch loss 2.31892061 epoch total loss 2.66387
Trained batch 2552 batch loss 2.51602602 epoch total loss 2.66381216
Trained batch 2553 batch loss 2.39919519 epoch total loss 2.66370869
Trained batch 2554 batch loss 2.28627658 epoch total loss 2.66356087
Trained batch 2555 batch loss 2.23555613 epo

Trained batch 2661 batch loss 2.60494447 epoch total loss 2.66396403
Trained batch 2662 batch loss 2.47833014 epoch total loss 2.66389441
Trained batch 2663 batch loss 2.56348658 epoch total loss 2.66385674
Trained batch 2664 batch loss 2.72901917 epoch total loss 2.6638813
Trained batch 2665 batch loss 2.64466381 epoch total loss 2.66387391
Trained batch 2666 batch loss 2.76083088 epoch total loss 2.66391039
Trained batch 2667 batch loss 2.57398558 epoch total loss 2.66387677
Trained batch 2668 batch loss 2.66206694 epoch total loss 2.66387606
Trained batch 2669 batch loss 2.43698788 epoch total loss 2.66379094
Trained batch 2670 batch loss 2.76095533 epoch total loss 2.66382742
Trained batch 2671 batch loss 2.8526032 epoch total loss 2.66389799
Trained batch 2672 batch loss 2.84223437 epoch total loss 2.66396475
Trained batch 2673 batch loss 2.72462821 epoch total loss 2.6639874
Trained batch 2674 batch loss 2.61071062 epoch total loss 2.66396761
Trained batch 2675 batch loss 2.44904

Trained batch 2781 batch loss 2.39357018 epoch total loss 2.66124272
Epoch 2 train loss 2.661242723464966
Validated batch 1 batch loss 2.48303986
Validated batch 2 batch loss 2.35830069
Validated batch 3 batch loss 2.7211051
Validated batch 4 batch loss 2.60170102
Validated batch 5 batch loss 2.88181114
Validated batch 6 batch loss 2.48031902
Validated batch 7 batch loss 2.75808144
Validated batch 8 batch loss 2.29706
Validated batch 9 batch loss 2.69501686
Validated batch 10 batch loss 2.83509135
Validated batch 11 batch loss 3.02634811
Validated batch 12 batch loss 2.68250537
Validated batch 13 batch loss 2.16237617
Validated batch 14 batch loss 2.55986357
Validated batch 15 batch loss 2.62879276
Validated batch 16 batch loss 2.76979876
Validated batch 17 batch loss 2.59978151
Validated batch 18 batch loss 2.45935035
Validated batch 19 batch loss 2.80367947
Validated batch 20 batch loss 2.78261137
Validated batch 21 batch loss 2.63833261
Validated batch 22 batch loss 2.52402353
Valid

Validated batch 198 batch loss 2.53481579
Validated batch 199 batch loss 2.76313424
Validated batch 200 batch loss 2.57352352
Validated batch 201 batch loss 3.08416152
Validated batch 202 batch loss 2.79147458
Validated batch 203 batch loss 2.66361713
Validated batch 204 batch loss 2.72889614
Validated batch 205 batch loss 2.49806237
Validated batch 206 batch loss 2.69334078
Validated batch 207 batch loss 2.4944706
Validated batch 208 batch loss 2.84606862
Validated batch 209 batch loss 2.83479285
Validated batch 210 batch loss 2.83620405
Validated batch 211 batch loss 2.64006186
Validated batch 212 batch loss 2.80038977
Validated batch 213 batch loss 2.70072412
Validated batch 214 batch loss 2.8780973
Validated batch 215 batch loss 2.70804763
Validated batch 216 batch loss 2.96351361
Validated batch 217 batch loss 2.55645418
Validated batch 218 batch loss 2.95180297
Validated batch 219 batch loss 2.64404821
Validated batch 220 batch loss 2.49672914
Validated batch 221 batch loss 2.195

### Simplebaseline

In [20]:
import tensorflow as tf

resnet = tf.keras.applications.resnet.ResNet50(include_top=False, weights='imagenet')

def _make_deconv_layer(num_deconv_layers):
    seq_model = tf.keras.models.Sequential()

    for i in range(num_deconv_layers):
        seq_model.add(tf.keras.layers.Conv2DTranspose(256, kernel_size=(4,4), strides=(2,2), padding='same'))
        seq_model.add(tf.keras.layers.BatchNormalization())
        seq_model.add(tf.keras.layers.ReLU())
    return seq_model


upconv = _make_deconv_layer(3)
final_layer = tf.keras.layers.Conv2D(16, kernel_size=(1,1), padding='same')


def Simplebaseline(input_shape=(256, 256, 3)):
    resnet = tf.keras.applications.resnet.ResNet50(include_top=False, weights='imagenet')
    inputs = tf.keras.Input(shape=input_shape)

    x = resnet(inputs)
    x = upconv(x)
    out = final_layer(x)
    model = tf.keras.Model(inputs, out, name='simple_baseline')
    return model

In [21]:
model2 = Simplebaseline()

In [24]:
def train(epochs, start_epoch, learning_rate, tensorboard_dir, checkpoint,
          num_heatmap, batch_size, train_tfrecords, val_tfrecords, version):
    strategy = tf.distribute.MirroredStrategy()
    global_batch_size = strategy.num_replicas_in_sync * batch_size
    train_dataset = create_dataset(
        train_tfrecords, global_batch_size, num_heatmap, is_train=True)
    val_dataset = create_dataset(
        val_tfrecords, global_batch_size, num_heatmap, is_train=False)

    if not os.path.exists(os.path.join('./models')):
        os.makedirs(os.path.join('./models/'))

    with strategy.scope():
        train_dist_dataset = strategy.experimental_distribute_dataset(
            train_dataset)
        val_dist_dataset = strategy.experimental_distribute_dataset(
            val_dataset)

        model2 = Simplebaseline(IMAGE_SHAPE)
        if checkpoint and os.path.exists(checkpoint):
            model.load_weights(checkpoint)

        trainer = Trainer(
            model2,
            epochs,
            global_batch_size,
            strategy,
            initial_learning_rate=learning_rate,
            start_epoch=start_epoch,
            version=version,
            tensorboard_dir=tensorboard_dir)

        print('Start training...')
        return trainer.run(train_dist_dataset, val_dist_dataset)

In [None]:
tfrecords_dir = os.getenv('HOME')+'/aiffel/mpii/tfrecords_mpii/'
train_tfrecords = os.path.join(tfrecords_dir, 'train*')
val_tfrecords = os.path.join(tfrecords_dir, 'val*')
epochs = 2
batch_size = 8
num_heatmap = 16
tensorboard_dir = './logs/'
learning_rate = 0.0007
start_epoch = 1

automatic_gpu_usage()

pretrained_path = None # './models_old/model-v0.0.2-epoch-15-loss-1.1013.h5'

history2 = train(epochs, start_epoch, learning_rate, tensorboard_dir, pretrained_path,
      num_heatmap, batch_size, train_tfrecords, val_tfrecords, '0.0.1')