In [None]:
import cv2
import os
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Activation, Dropout, MaxPooling2D, Flatten, GlobalMaxPooling2D, BatchNormalization
from keras.optimizers import SGD, Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator

FTRAIN = './data/train/train_labels.csv'
FTEST = './data/test/test_labels.csv'

TRAIN_IMAGE_PATH = './data/train/'
TEST_IMAGE_PATH = './data/test/'

In [None]:
import tensorflow as tf
# 只使用 30% 的 GPU 記憶體
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# 設定 Keras 使用的 TensorFlow Session
tf.keras.backend.set_session(sess)

In [None]:
IMG_H = 720
IMG_W = 1280


BREAK_POINT = 5000

BATCH_SIZE = 32
EPOCHS = 100
VAL_SPLIT = 0.15

In [None]:
fname = FTRAIN
df = pd.read_csv(os.path.expanduser(fname))

In [None]:
df

In [None]:
df = df.dropna()
print (df.count())

In [None]:
df['filename']

In [None]:
# 刪除 size 列
df.drop(['width', 'height', 'class'], axis=1)

# 處理 影像 資料

In [None]:
img_num = 0
x_train = []

for img_label_path in df['filename']:
    #print(img_label_path)
    img_num += 1
    
    img_path = TRAIN_IMAGE_PATH + img_label_path
    # print("     ", picture)
    image = cv2.imread(img_path, 0)  # open .jpg data
    # print("     ", np.array(image).shape)
    image = np.array(image).astype(np.float32)
    image = image / 255  # normalize in 0 ~ 1
    x_train.append(image)
    
    # check BREAK_POINT
    if img_num == BREAK_POINT:
        break
    

In [None]:
print("img_num: " + str(img_num))

In [None]:
x_train = np.array(x_train)

In [None]:
x_train.shape

In [None]:
x_train = x_train.reshape(x_train.shape[0], IMG_H, IMG_W, 1)

In [None]:
x_train.shape

# 處理 Labels 資料

In [None]:
df

In [None]:
df['xmin'] = df['xmin']/IMG_W
df['ymin'] = df['ymin']/IMG_H

df['xmax'] = df['xmax']/IMG_W
df['ymax'] = df['ymax']/IMG_H

In [None]:
y_train = []
for i in range(BREAK_POINT):
    y_train = np.append(y_train, df['xmin'].values[i], axis = None)
    y_train = np.append(y_train, df['ymin'].values[i], axis = None)
    y_train = np.append(y_train, df['xmax'].values[i], axis = None)
    y_train = np.append(y_train, df['ymax'].values[i], axis = None)
    

In [None]:
y_train

In [None]:
y_train.shape

In [None]:
y_train = y_train.reshape(BREAK_POINT, 4)

In [None]:
df

In [None]:
y_train[0][:]

In [None]:
y_train.shape

# 建立 CNN 模型 (不使用 softmax)

In [None]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dropout, Dense, Activation

from keras import backend as k
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
def build_model():

    input1 = Input(shape=(IMG_H, IMG_W, 1))   # input, 720 * 1280

    out = Conv2D(filters=8, kernel_size=(3, 3), 
                 input_shape=(1, IMG_H, IMG_W), activation='relu', name='C1')(input1)   # C1, 6 @ 718 * 1278
    out = MaxPooling2D(pool_size=(2, 2), name='S2')(out)                                # S2, 6 @ 358 * 638
    
    out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', name='C3')(out)     # C3, 12 @ 356 * 636
    out = MaxPooling2D(pool_size=(2, 2), name='S4')(out)                                # S4, 12 @ 178 * 318

    out = Conv2D(filters=64, kernel_size=(3,3), activation='relu', name='C5')(out)      # C5, 24 @ 176 * 316
    out = MaxPooling2D(pool_size=(2, 2), name='S6')(out)                                # S6, 24 @ 88 * 158
    
    out = Conv2D(filters=128, kernel_size=(3,3), activation='relu', name='C7')(out)      # C7, 32 @ 86 * 156
    out = MaxPooling2D(pool_size=(2, 2), name='S8')(out)                                # S8, 32 @ 43 * 78
    
    out = Flatten(name='flatten')(out)      # C5-2, (1 * 120(120 * 1 * 1))

    #out = Dense(units=512, activation='relu', name='F9_1')(out)    # F6, 1 * out_dim
    #out = Dense(units=256, activation='relu', name='F9_2')(out)    # F6, 1 * out_dim
    out = Dense(units=128, activation='relu', name='F9_1')(out)    # F6, 1 * out_dim
    out = Dense(units=32, activation='relu', name='F9_2')(out)    # F6, 1 * out_dim
    out = Dense(units=4, activation='linear', name='F9_3')(out)    # F6, 1 * out_dim
    #out = Activation('softmax')(out)

    model = Model(inputs=input1, outputs=out)

    #model.compile(loss='categorical_crossentropy',
    #              optimizer='adam',
    #              metrics=['accuracy'])

    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    return model


In [None]:
model = build_model()

In [None]:
output_dir = "output_model"
model_name = "Hand_Location_CNN_Model0604"
model_path = output_dir + '/' + model_name
checkpoint = ModelCheckpoint(model_path + ".h5", monitor='val_loss', verbose=0, mode='min',
                             save_best_only=True, save_weights_only=False, period=1)

reduce_lr = ReduceLROnPlateau(patience=5, factor=0.5, min_lr=0.01, verbose=0)

train_history = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=VAL_SPLIT, verbose=2,
                          callbacks=[checkpoint, reduce_lr])