# 1. Import Packages

In [None]:
import os, sys
from utils import *
from generator_v1 import *
from glob import glob

# 2. Configuration Setting

### 2-1 BASE CONFIGURATION

In [None]:
# BASE CONFIGURATION

class _config(Config):
    PROJ_NAME = 'diebonder'
    MODE=["train","eval"][0]
    MODEL_NAME=["InceptionResNetV2", "DenseNet201", "NASNetMobile", "NASNetLarge", "EfficientNet"][3]
    
    DATA_GLOB='/raid/nvidia/jupyterhub/notebook/int/jingwan_diebonder/Data/PoC/chest_xray/*/*/*.*'
    SAVE_BASE_DIR='./logs'
    
    LABEL_MAP=OrderedDict({"normal":0, "pneumonia":1})
    
    GPU_MIN_MEM=30000
    GPU_OPTION="auto"
    
    CLASS_WEIGHTS=False
    BATCH_SIZE=16
    NUM_EPOCH=200
    LR=1e-06
    
model_config=_config()
model_config.display()

### 2-2 DATA CONFIGURATION

In [None]:
# DATA RELATED CONFIGURATION

# image laod
img_list = glob(model_config.DATA_GLOB)

data_df=pd.DataFrame({"image": img_list,
                      "label_name": list(map(lambda x : x.split("/")[-2], img_list)),
                      "label": list(map(lambda x : model_config.LABEL_MAP[x.split("/")[-2]], img_list)),
                      "train_test": list(map(lambda x : x.split("/")[-3], img_list))
                      })

# print data info
print (data_df.groupby(['train_test','label_name'])['image'].count())

# train / test dataset split
train_df = data_df.loc[data_df['train_test']=='train'].drop(['train_test'],axis=1).reset_index(drop=True)
test_df = data_df.loc[data_df['train_test']=='test'].drop(['train_test'],axis=1).reset_index(drop=True)

# apply class weight
y_label=train_df["label_name"]
if model_config.CLASS_WEIGHTS :
    class_weights=compute_class_weight('balanced', np.unique(y_label), y_label)
else :
    class_weights=None
    
print ('class weight: {} = {}'.format(np.unique(y_label),class_weights))

### 2-3 GPU CONFIGURATION

In [None]:
# GPU CONFIGURATION
while True : 
    
    # auto 인 경우 아무 조건에 맞는 할당 가능한 gpu 번호 확인 
    if model_config.GPU_OPTION == "auto" :
        gpu_available=ckech_available_gpu(num_gpu=model_config.GPU_NUM, min_mem_mb=model_config.GPU_MIN_MEM)
        
        if len(gpu_available) > 0 : model_config.GPU_OPTION=gpu_available
        else : model_config.GPU_OPTION="99"
            
    # 조건에 맞는 할당 가능 gpu 번호 확인
    if isinstance(model_config.GPU_OPTION, str) : _set_gpu=model_config.GPU_OPTION
    elif isinstance(model_config.GPU_OPTION, (list, tuple, np.ndarray)): _set_gpu=",".join(np.array(model_config.GPU_OPTION, np.str))
    else : _set_gpu="99"
        
    if model_config.ALLOW_CPU :break
        
    # gpu 할당 대기
    elif _set_gpu == "99" :
        time.sleep(5*60)
        print("Wait for GPU.", end='\r')
        
    else: break

# gpu 할당
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES']=_set_gpu

if _set_gpu == "99": print("CPU is set.")
else : print("GPU is set '{}'.".format(_set_gpu))

# Set session
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
set_session(tf.Session(config = tf_config))

from setproctitle import setproctitle
setproctitle("{}_{}_{}".format(model_config.PROJ_NAME,model_config.MODE,model_config.MODEL_NAME))

### 2-4 MODEL CONFIGURATION

In [None]:
# MODEL CONFIGURATION

# Set Model
if model_config.MODEL_NAME=="InceptionResNetV2":
    from keras.applications.inception_resnet_v2 import InceptionResNetV2 as base_model
    model_config.INPUT_SHAPE=(299,299,3)

elif model_config.MODEL_NAME=="DenseNet201":
    from keras.applications.densenet import DenseNet201 as base_model
    model_config.INPUT_SHAPE=(224,224,3)

elif model_config.MODEL_NAME=="NASNetMobile":
    from keras.applications.nasnet import NASNetMobile as base_model
    model_config.INPUT_SHAPE=(224,224,3)

elif model_config.MODEL_NAME=="NASNetLarge":
    from keras.applications.nasnet import NASNetLarge as base_model
    model_config.INPUT_SHAPE=(331,331,3)
    
elif model_config.MODEL_NAME=="EfficientNet":
    from efficientnet.keras import EfficientNetB5 as base_model
    model_config.INPUT_SHAPE=(456,456,3)
    
model_config.NUM_CLASS=len(data_df["label"].unique())

date = datetime.now().strftime("%y%m%d")
if not hasattr(model_config, "LOG_DIR"):
    model_config.LOG_DIR = os.path.join(model_config.SAVE_BASE_DIR,model_config.PROJ_NAME,"logs_{}".format(date),model_config.MODEL_NAME)

    if not os.path.exists(model_config.LOG_DIR) :
        os.makedirs(model_config.LOG_DIR, exist_ok=True)

# 3. MODEL BUILD

### 3-1 MODEL INFO

In [None]:
model.layers

In [None]:
#MODEL BUILD

model=build_model(model_config=model_config, base_model=base_model)
model_json = model.to_json()
with open(os.path.join(model_config.LOG_DIR,'model.json'),"w") as f: f.write(model_json)

# Load weights
ckpt_list=sorted(glob(os.path.join(model_config.LOG_DIR, "*.h5")))

if len(ckpt_list) > 0 :
    find_best = [x for x in ckpt_list if "train_best.h5" in x]
    if len(find_best) > 0:
        ckpt_path=find_best[0]
        init_epoch=0
    else:
        ckpt_path=ckpt_list[-1]
        init_epoch=int(ckpt_path.split("-")[-2])
        
    model.load_weights(ckpt_path)
    print("Restart from {}".format(ckpt_path))

else :
    [os.remove(x) for x in glob(os.path.join(model_config.LOG_DIR, "*tfevents*"))]
    print("Start from initial weights !")
    init_epoch=0

### 3-2 TRAINING INFO

In [None]:
# Set Callbacks
ckpt=ModelCheckpoint(os.path.join(model_config.LOG_DIR, 'epoch-{epoch:02d}-{val_acc:.5f}.h5'), 
                     monitor='val_acc', verbose=1, save_best_only=True)

tboard=TensorBoard(log_dir=model_config.LOG_DIR, histogram_freq=0,
                     write_graph=True, write_images=True)

reducer1=ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=3, verbose=1,
                          mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

reducer2=ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=5, verbose=1,
                          mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

stopper=EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1,
                      mode='max', baseline=None, restore_best_weights=True)

callbacks_list=[ckpt, tboard, stopper, reducer1, reducer2]

# Make Generator
train_generator=BalencedDataGenerator(data=train_df, config=model_config, is_train=True)
valid_generator=BalencedDataGenerator(data=test_df, config=model_config, is_train=False)

# Set Params
train_count=train_df.shape[0]
one_epoch=int(train_count/model_config.BATCH_SIZE)
num_epochs=model_config.NUM_EPOCH

# 4. RUNNING

### 4-1 TRAINING

In [None]:
if model_config.MODE=="train":
    
    history=model.fit_generator(train_generator, 
                                steps_per_epoch=one_epoch,
                                epochs=num_epochs,
                                callbacks=callbacks_list,
                                initial_epoch=init_epoch,
                                verbose=1,
                                validation_data=valid_generator,
                                validation_steps=test_df.shape[0]//model_config.BATCH_SIZE,
                                class_weight=class_weights)

    model.save(os.path.join(model_config.LOG_DIR, 'train_best.h5'), include_optimizer = False)