In [None]:
import tensorflow as tf
import numpy as np 
import pandas as pd
import matplotlib as plt
from sklearn.model_selection import train_test_split
import sys
import os
import pickle
from skimage import color

import keras
from keras import regularizers, losses
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Reshape
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, ProgbarLogger

In [None]:
""" 取得專案目錄，方便管理並匯入自訂套件 """
prj_root = os.path.join(os.getcwd(), os.pardir)
sys.path.append(prj_root)
import preprocess.dyeprocess as dyeprocess
from util.normalize import unnormalize_lab

In [None]:
""" 參數設定 """
MIN_OCCUR = 0
BATCH_SIZE = 128
PATIENCE = 500
EPOCHS = 10000
SEED = 88

DATA_FILE_ALL = os.path.join(prj_root, 'data', 'hong_make_all_revised_3.csv')
DATA_FILE_SINGLE = os.path.join(prj_root, 'data', 'hong_make_single_revised_3.csv')
DECODER_INPUT_COLUMNS_PATH = os.path.join(prj_root, 'resource', 'pickled', 'decoder_input_columns.pkl')
LAB_MEAN_PATH = os.path.join(prj_root, 'resource', 'pickled', 'lab_mean.pkl')
LAB_STD_PATH = os.path.join(prj_root, 'resource', 'pickled', 'lab_std.pkl')
X_VAL_PATH = os.path.join(prj_root, 'resource', 'pickled', 'decoder_x_val.pkl')
Y_VAL_PATH = os.path.join(prj_root, 'resource', 'pickled', 'decoder_y_val.pkl')
CKP_PATH = os.path.join(prj_root, 'resource', 'keras_model', 'keras_decoder_model_tmp')

In [None]:
""" load data """
df_all = pd.read_csv(DATA_FILE_ALL)
df_all = df_all[df_all['abort']!=1]
df_all = df_all[df_all['L'].notnull()]

df_single = pd.read_csv(DATA_FILE_SINGLE)
df_single = df_single[df_single['abort']!=1]

In [None]:
""" 將濃度轉成濃度向量，並把布號 one hot encoding """
df_all_one_hot = dyeprocess.df_dye_one_hot_with_concetration(df_all, df_all, df_single, min_occur=MIN_OCCUR)
df_single_one_hot = dyeprocess.df_dye_one_hot_with_concetration(df_single, df_all, df_single, min_occur=MIN_OCCUR)

# 去除沒有染料資料的 sample
# -0 結尾代表是想要染的對象，所以不會有染料的資料
df_all_one_hot = df_all_one_hot[np.logical_not(df_all_one_hot['LAB'].str.contains('-\s*0\s*$', case=True, regex=True))]

df_all_one_hot = pd.concat([df_all_one_hot, pd.get_dummies(df_all_one_hot['單色光'])], axis=1)
df_single_one_hot = pd.concat([df_single_one_hot, pd.get_dummies(df_single_one_hot['布號'])], axis=1)

In [None]:
""" 取得 model 要用的 input x 和 output y """
x = pd.concat([df_all_one_hot.filter(regex='^concentration|^FVF'), df_single_one_hot.filter(regex='^concentration|^FVF')])
raw_y = pd.concat([df_all_one_hot[['L', 'a', 'b']], df_single_one_hot[['L', 'a', 'b']]])
raw_y_mean = raw_y.mean()
raw_y_std = raw_y.std()

with open(DECODER_INPUT_COLUMNS_PATH, 'wb') as output:
    pickle.dump(x.columns.values, output)
with open(LAB_MEAN_PATH, 'wb') as output:
    pickle.dump(raw_y_mean, output)
with open(LAB_STD_PATH, 'wb') as output:
    pickle.dump(raw_y_std, output)
    
# normalize y
y = (raw_y - raw_y_mean) / raw_y_std

In [None]:
""" 切 training testing set """
single_df_start_idx = -1*len(df_single_one_hot)

x_train, x_val, y_train, y_val = train_test_split(x.iloc[:single_df_start_idx, :],
                                                  y.iloc[:single_df_start_idx, :], 
                                                  test_size=0.1, 
                                                  random_state=SEED)

# 把單色染料全部放入 training set
x_train = pd.concat([x_train, x.iloc[single_df_start_idx:, :]])
y_train = pd.concat([y_train, y.iloc[single_df_start_idx:, :]])

with open(X_VAL_PATH, 'wb') as output:
    pickle.dump(x_val, output)
with open(Y_VAL_PATH, 'wb') as output:
    pickle.dump(y_val, output)

In [None]:
""" 取得艷度高的 training set。 """
y_train_actual = y_train.copy().values
y_train_actual[:, 0] = y_train_actual[:, 0] * raw_y_std['L'] + raw_y_mean['L']
y_train_actual[:, 1] = y_train_actual[:, 1] * raw_y_std['a'] + raw_y_mean['a']
y_train_actual[:, 2] = y_train_actual[:, 2] * raw_y_std['b'] + raw_y_mean['b']
y_train_actual

lch_train = color.lab2lch(y_train_actual)
x_train_high_c = x_train.iloc[lch_train[:,1]>35, :].copy()
y_train_high_c = y_train.iloc[lch_train[:,1]>35, :].copy()

In [None]:
vec_dimesion = len(x.columns)
L1_REG = 0
L2_REG = 1e-4
LEAKY_RELU_VALUE = 0.5

model = keras.Sequential([
    Dense(128, 
          input_shape=(vec_dimesion,), 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(128,
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(32,
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(16,
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG),
          kernel_initializer=keras.initializers.he_normal(seed=SEED)),
    LeakyReLU(LEAKY_RELU_VALUE),
    Dense(3, 
          kernel_regularizer=keras.regularizers.l1_l2(l1=L1_REG, l2=L2_REG))])

opt = keras.optimizers.Adam(lr=5e-4)
# sgd = keras.optimizers.SGD(lr=5e-4, decay=0, momentum=0.9, nesterov=True)

model.compile(loss=losses.mean_absolute_error,
              optimizer=opt)

# LEAKY_RELU_VALUE 0.0 loss 1.9437939551700134
# LEAKY_RELU_VALUE 0.05 loss 1.8192624274099036
# LEAKY_RELU_VALUE 0.1 loss 1.9517180010487938

In [None]:
earlystop = EarlyStopping(monitor='val_loss', patience=PATIENCE, verbose=1)
checkpoint = ModelCheckpoint(CKP_PATH, monitor='val_loss', save_best_only=True, verbose=1)

In [None]:
""" 先 train 艷度高的資料 """
model_history_1 = model.fit(x_train_high_c, 
                            y_train_high_c, 
                            int(len(x_train_high_c)/16), 
                            EPOCHS,                         
                            validation_data=(x_val, y_val),
                            callbacks=[checkpoint, earlystop],
                            verbose=1)

In [None]:
""" train 所有資料 in training set """
model_history_2 = model.fit(x_train, 
                            y_train, 
                            BATCH_SIZE, 
                            EPOCHS,                         
                            validation_data=(x_val, y_val),
                            callbacks=[checkpoint, earlystop],
                            verbose=1)

In [None]:
""" 預測 validation set 的結果，並計算 delta E_lab """
bst_model = keras.models.load_model(CKP_PATH)
y_pred = bst_model.predict(x_val)
y_pred = unnormalize_lab(y_pred)

y_val_true = unnormalize_lab(y_val.values)

# delta E_Lab
np.sqrt(((y_pred - y_val_true)**2).sum(axis=1)).mean()