In [1]:
%matplotlib inline

import os
import cv2
import sys
import time
import random
import json
import glob
import warnings
from datetime import datetime
sys.path.append("../")
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
tf.logging.set_verbosity(tf.logging.ERROR)

from tensorflow.keras.models import Model 
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Input
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import CSVLogger, TensorBoard
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import backend as K
from engine.parallel import ParallelModel
from multiprocessing import cpu_count

from engine.config import ModelConfiguration
from engine.utils import *

for dirpath in glob.glob("../logs/*"):
    if os.path.isdir(dirpath) and len(os.listdir(dirpath))==0:
        os.removedirs(dirpath)

# \[ Data Generator \]
---

> 우리는 Semantic Segmentation & Instance Segmentation을 수행하는 모델을 개발.

In [2]:
from engine.retinamasklab import construct_masklabdataset

In [4]:
config = ModelConfiguration()
config.dataset.data_dir = "../road_project/datasets/"
trainset, validset = construct_masklabdataset(config)

Dataset Summary
-------------------------------
* Num of Train Images : 0
* Num of Valid Images : 0
* Num of Images : 0
-------------------------------



# \[ Allocate Trainable(Sharing) layers \]
---

> 학습할 때와 추론할 때, 모델은 서로 다르게 동작함. 먼저 학습할 Weight Layer들을 구성 후, Training Model과 Inference Model

In [6]:
from engine.retinamasklab import build_backbone_network
from engine.retinamasklab import build_detection_network
from engine.retinamasklab import build_semantic_network
from engine.retinamasklab import build_instance_network

In [7]:
K.clear_session()

#####################
# Allocate Trainable Weights
#####################

## 1. Build Base Network
config.backbone.backbone_type = 'seresnet34'
config.backbone.backbone_outputs = ('C3', 'C4', 'C5', 'P6')

backbone_network = build_backbone_network(config)

## 2. Build Layers Related On Detection
config.detection.num_features = 128
config.detection.num_depth = 3
config.detection.use_squeeze_excite = True

detection_networks = build_detection_network(config)

## 4. Build Layers Related On Mask Prediction
config.instance.use_squeeze_excite = True
config.instance.num_features = 128
config.instance.num_depth = 3

instance_networks = build_instance_network(config)

## 3. Build Layers Related On Semantic Segmentation
config.semantic.atrous_groups = 16
config.semantic.num_features = 128
config.semantic.num_depth = 3
config.semantic.use_separable_conv = False
config.semantic.use_squeeze_excite = True

semantic_networks = build_semantic_network(config)


Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/seresnet34_imagenet_1000_no_top.h5
Backbone Network Summary
------------------------
* Backbone Type : seresnet34
* Backbone outputs : ('C3', 'C4', 'C5', 'P6')
* Num features of Backbone Additional Layers: 128
------------------------

Prior Network Summary
-------------------------------
* Strides of prior : [8, 16, 32, 64]
* Sizes of prior : [32, 64, 128, 256]
* width/height scales of prior : [1, 1.2599210498948732, 1.5874010519681994]
* width/height ratios of prior : [0.3333333333333333, 0.5, 1, 2, 3]
-------------------------------

Feature Pyramid Network Summary
-------------------------------
* Feature Pyramid Inputs  : ('C3', 'C4', 'C5')
* Num Features of Feature Pyramid : 128
-------------------------------

Detection Head Networks Summary
-------------------------------
* Num Classes of Detection Classes : 5
* Num Depth of Sub Networks : 3
* Num Features of Sub Networks : 128
* Use S

# \[ Build Trainer & Predictor \]
---

> Weight들을 공유하는 Trainer와 Predictor을 각각 선언.

In [8]:
from engine.retinamasklab import construct_trainer_network
from engine.retinamasklab import construct_inference_network
from engine.losses import *

In [9]:
trainer = construct_trainer_network(
    configuration=config, backbone_network=backbone_network,
    detection_networks=detection_networks,
    semantic_networks=semantic_networks,
    instance_networks=instance_networks)

inference = construct_inference_network(
    configuration=config, backbone_network=backbone_network,
    detection_networks=detection_networks,
    semantic_networks=semantic_networks,
    instance_networks=instance_networks)

Detection Losses Summary
-------------------------------
* Classification Loss Weight: 300
* Classification Loss alpha: 0.25
* Classification Loss gamma: 2.0
* Localization Loss Weight: 1.0
* Localization Use adjusted Smoothing L1: True
* Localization Loss momentum: 0.9
* Localization Loss beta: 0.11
-------------------------------

Instance Loss Summary
-------------------------------
* Instance Loss Weight: 0.01
* Instance Loss Label Smoothing: 0.0
-------------------------------

Semantic Loss Summary
-------------------------------
* Semantic Loss Weight: 0.5
* Semantic Loss Label Smoothing: 0.0
-------------------------------



# \[ train Multi-GPU Trainer \]
---

> ParallelModel을 구성함

## 1. 학습 준비하기 
---


### (1) Set-Up 하기

In [10]:
#####################
# Training Set-Up
#####################

## 1. Batch size
batch_size = config.train.batch_size

## 2. GPU Count
gpu_count = config.train.gpu_count

## 3. Save Directory Set-Up
dt = datetime.strftime(datetime.now(),'%m-%d-%H')
save_dir = f"../logs/without_naver/{config.backbone.backbone_type}/{dt}/"
os.makedirs(save_dir, exist_ok=True)
config.train.save_dir = save_dir

## 4. Save Configuration File
with open(os.path.join(save_dir, 'config.json'),'w') as f:
    json.dump(config.to_dict(),f)

### (2) CallBack 함수 구성하기

In [11]:
from engine.callbacks import SaveInferenceModel, CyclicLR
from tensorflow.keras.callbacks import CSVLogger, TensorBoard

In [12]:
ckpt = SaveInferenceModel(save_dir, inference)
tb = TensorBoard(save_dir)
csvl = CSVLogger(
    os.path.join(save_dir, 'train.csv'))

### (3) DataGenerator 구성하기

In [13]:
traingen = MaskLabGenerator(trainset.config,
                             scale_ratio=(.4,.6),
                             batch_size=batch_size)

validgen = MaskLabGenerator(validset.config,
                             scale_ratio=.5, 
                             batch_size=32,)

## 2. Train Step-by-Step

In [14]:
from engine.parallel import ParallelModel
from engine.optimizers import RectifiedAdam

### (0) 모델 학습 가능한지 평가하기

In [15]:
print("Check Fit Dataset Successfully")
checkgen = MaskLabGenerator(trainset, 
                            scale_ratio=.6, 
                            batch_size=batch_size,)
# Full Trainable
for layer in trainer.layers:
    layer.trainable = True
if gpu_count > 1:
    parallel = ParallelModel(trainer, gpu_count)
else:
    parallel = trainer
    
# Set Loss & Metric
for tensor, name in zip(parallel.output, parallel.output_names):
    if 'loss' in name:
        parallel.add_loss(K.mean(tensor))
    parallel.add_metric(tensor, aggregation='mean',name=name)
parallel.compile(RectifiedAdam(1e-10))

parallel.fit_generator(checkgen, steps_per_epoch=10, verbose=1, 
                       callbacks=[tb])

Check Fit Dataset Successfully


<tensorflow.python.keras.callbacks.History at 0xb6ba2f550>

### (1) Train Head Network

In [None]:
# Freeze BackBone Network
freeze('C5')
if gpu_count > 1:
    parallel = ParallelModel(trainer, gpu_count)
else:
    parallel = trainer
    
# Set Loss & Metric
for tensor, name in zip(parallel.output, parallel.output_names):
    if 'loss' in name:
        parallel.add_loss(K.mean(tensor))
    parallel.add_metric(tensor, aggregation='mean',name=name)

# Compile Model
clr = CyclicLR(base_lr=1e-4, max_lr=1e-3, step_size=700)
parallel.compile(RectifiedAdam(1e-3))
callbacks = [ckpt, tb, clr, csvl]

#### LR Range Test

In [None]:
print("Save Current Weight")
parallel.save_weights("temp.h5") # Weight 임시 저장

lr_list = np.logspace(-7,-1,num=50)
lrschedule = LearningRateScheduler(lambda x: lr_list[x],verbose=1)

print("Start to do LR range Test")
parallel.compile(RectifiedAdam(1e-3))
hist = parallel.fit_generator(traingen, 
                              steps_per_epoch=3, 
                              epochs=len(lr_list),
                              verbose=1,
                              callbacks=[lrschedule])

print("Reload weight")
parallel.load_weights('temp.h5') # Weight 불러오기
os.remove('temp.h5')

plt.title("Learning Rate Range Test")
plt.plot(lr_list, hist.history['loss'])
plt.xscale('log')
plt.ylim((0.,2.))
plt.show()

In [None]:
# Compile Model
clr = CyclicLR(base_lr=1e-4, max_lr=1e-3, step_size=700)
parallel.compile(RectifiedAdam(1e-4))
callbacks = [ckpt, tb, clr, csvl]

parallel.fit_generator(traingen, epochs=10, steps_per_epoch=300,  
                       validation_data=validgen, callbacks=callbacks)

### (2) Train FineTune Network

In [None]:
# Freeze BackBone Network
freeze('C2')
if gpu_count > 1:
    parallel = ParallelModel(trainer, gpu_count)
else:
    parallel = trainer
    
# Set Loss & Metric
for tensor, name in zip(parallel.output, parallel.output_names):
    if 'loss' in name:
        parallel.add_loss(K.mean(tensor))
    parallel.add_metric(tensor, aggregation='mean',name=name)

#### LR Range Test

In [None]:
print("Save Current Weight")
parallel.save_weights("temp.h5") # Weight 임시 저장

lr_list = np.logspace(-8,-1,num=200)
lrschedule = LearningRateScheduler(lambda x: lr_list[x],verbose=1)

print("Start to do LR range Test")
parallel.compile(RectifiedAdam(1e-3))
hist = parallel.fit_generator(traingen, 
                              steps_per_epoch=3, 
                              epochs=len(lr_list),
                              verbose=1,
                              callbacks=[lrschedule])

print("Reload weight")
parallel.load_weights('temp.h5') # Weight 불러오기
os.remove('temp.h5')

plt.title("Learning Rate Range Test")
plt.plot(lr_list, hist.history['loss'])
plt.xscale('log')
plt.ylim((0.,1.))
plt.show()

In [None]:
# Compile Model
clr = CyclicLR(base_lr=1e-4, max_lr=1e-3, step_size=700)
parallel.compile(RectifiedAdam(1e-3))
callbacks = [ckpt, tb, clr, csvl]

# Warm-Up 
parallel.fit_generator(traingen, epochs=20,
                       steps_per_epoch=300, initial_epoch=10,
                       validation_data=validgen, callbacks=callbacks)

### (3) Train All Network

In [None]:
# Freeze BackBone Network
for layer in trainer.layers:
    layer.trainable = True

if gpu_count > 1:
    parallel = ParallelModel(trainer, gpu_count)
else:
    parallel = trainer
    
# Set Loss & Metric
for tensor, name in zip(parallel.output, parallel.output_names):
    if 'loss' in name:
        parallel.add_loss(K.mean(tensor))
    parallel.add_metric(tensor, aggregation='mean',name=name)

# Compile Model
clr = CyclicLR(base_lr=1e-5, max_lr=1e-3, step_size=700)
parallel.compile(RectifiedAdam(1e-4))
callbacks = [ckpt, tb, clr, csvl]

In [None]:
# Warm-Up 
parallel.fit_generator(traingen, epochs=70,
                       steps_per_epoch=300, initial_epoch=20,
                       validation_data=validgen, callbacks=callbacks)