In [1]:
import os
import argparse
import gc
import warnings
warnings.filterwarnings('ignore')

In [2]:
# for tensorflow v2.1
import tensorflow as tf
from tensorflow.keras import layers

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, ReduceLROnPlateau
from tensorflow.keras.utils import get_custom_objects

import numpy as np
import pandas as pd
import datetime

In [3]:
gc.collect()

22

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

6 Physical GPUs, 6 Logical GPUs


In [5]:
# Output produced by the experiment (summaries, checkpoints etc.) has to be placed in this folder.
EXPERIMENT_OUTPUT_PATH = "/mnt/output/experiment"
MODEL_VERSION = 1

In [6]:
def gelu(x):
  return 0.5 * x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))

In [7]:
get_custom_objects().update({'gelu': Activation(gelu)})

In [8]:
model = Sequential([
  Dense(units=1024, activation='gelu', input_dim=226),
  Dense(units=900, activation='gelu'),
  Dropout(0.02),
  Dense(units=1024, activation='gelu'),
  Dense(units=512, activation='gelu'),
  Dense(units=512, activation='gelu'),
  Dense(units=4, activation='linear')
])

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1024)              232448    
_________________________________________________________________
dense_1 (Dense)              (None, 900)               922500    
_________________________________________________________________
dropout (Dropout)            (None, 900)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              922624    
_________________________________________________________________
dense_3 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_4 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 2

In [10]:
# 모델을 컴파일합니다.
model.compile(loss='mae', optimizer='adam', metrics=['mae'])

In [11]:
# callbacks
def callbacks(path):
  model_chk_path = os.path.join(path, 'checkpoints', 'model.ckpt')
  tb_path = os.path.join(path, 'tensorboard')
  checkpoint = tf.keras.callbacks.ModelCheckpoint(
    model_chk_path,
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='auto',
    save_weights_only=True
  )

  tensorboard = tf.keras.callbacks.TensorBoard(
    log_dir=tb_path,
    histogram_freq=0,
    write_graph=True,
    write_images=True,
  )

  reduce_lr = ReduceLROnPlateau(
    monitor='loss',
    factor=0.1,
    patience=10,
    verbose=1,
    mode='auto',
    # epsilon=1e-04,
    min_delta=1e-04,
    cooldown=0,
    min_lr=0
  )

  callback_list = [checkpoint, tensorboard, reduce_lr]
  return callback_list

In [12]:
import os

In [13]:
train_model_dir = '/mnt/output/experiment/model'
os.makedirs(train_model_dir, exist_ok=True)

In [14]:
callback_params = callbacks(train_model_dir)

In [15]:
# NUC에서 epoch 당 145초
# 5 epoch : mae = 15.1 (12분)
# 10 epoch : mae = 6.8 (24분)
# 15 epoch : mae = 4.5 (36분)
# 20 epoch : mae = 3.5 (48분)

epoch_num = 300
batch_size = 2000

In [16]:
!pwd

/mnt/output/experiment


In [17]:
data_dir='/mnt/input/root/public/data/thin-film/'
train_data = data_dir + 'train-splited.csv'

import pandas as pd

train_splited = pd.read_csv(train_data)

In [None]:
#독립변수와 종속변수를 분리합니다.
train_X = train_splited.iloc[:, 5:]
train_Y = train_splited.iloc[:, 1:5]

model.fit(
    train_X, train_Y, 
    epochs=epoch_num, 
    batch_size=batch_size, 
    validation_split=0.05, 
    callbacks=callback_params
)

Train on 761805 samples, validate on 40095 samples
Epoch 1/300
Epoch 00001: val_loss improved from inf to 52.78532, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 2/300
Epoch 00002: val_loss improved from 52.78532 to 35.43161, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 3/300
Epoch 00003: val_loss improved from 35.43161 to 21.77117, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 4/300
Epoch 00004: val_loss improved from 21.77117 to 15.56564, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 5/300
Epoch 00005: val_loss improved from 15.56564 to 12.88593, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 6/300
Epoch 00006: val_loss improved from 12.88593 to 9.89099, saving model to /mnt/output/experiment/model/checkpoints/model.ckpt
Epoch 7/300
Epoch 00007: val_loss improved from 9.89099 to 8.78206, saving model to /mnt/output/experiment/model/checkpoints/mo

In [None]:
# model.fit(train_X, train_Y, epochs=epoch_num, batch_size=batch_size, validation_split=0.05, callbacks=callback_params)