In [2]:
import os
import pandas as pd
import cv2

In [33]:
from keras.layers.convolutional import Conv3D, ZeroPadding3D
from keras.layers.pooling import MaxPooling3D
from keras.layers.core import Dense, Activation, Dropout, Flatten, SpatialDropout3D
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras.layers import BatchNormalization
#from keras.layers.recurrent import GRU
from keras.layers import GRU
from keras.layers import Input
from keras.models import Model
from keras import backend as K
from keras.layers.core import Lambda
from keras import backend as K
import tensorflow as tf

In [4]:
import numpy as np

In [5]:
x_data = np.load('data/X_train.npy') 
y_data = np.load('data/y_train.npy') 

In [6]:
print(x_data.shape, y_data.shape)

(2520, 5, 100, 200, 3) (2520, 11)


## Model Shape 확인

In [None]:
input_shape = (5, 100, 200, 3)
output_size = 11
x = Input(name = 'the_input', shape = input_shape, dtype = 'float32')
print(x.shape)
x = ZeroPadding3D(padding = (1,2,2), name = 'zero1')(x)
print(x.shape)
x = Conv3D(32, (3,5,5), strides = (1,2,2), activation = 'relu', kernel_initializer = 'he_normal', name = 'conv1')(x)
print(x.shape)
x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(x)
print(x.shape)
x = Dropout(0.5)(x)
print(x.shape)
print("=====================================================")
x = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(x)
print(x.shape)
x = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv2')(x)
print(x.shape)
x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(x)
print(x.shape)
x = Dropout(0.5)(x)
print(x.shape)
print("=====================================================")
x = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(x)
print(x.shape)
x = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), activation='relu', kernel_initializer='he_normal', name='conv3')(x)
print(x.shape)
x = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(x)
print(x.shape)
x = Dropout(0.5)(x)
print(x.shape) # (BN, 5, 6, 12, 96)
print("=====================================================")
x = TimeDistributed(Flatten())(x)
print(x.shape) # (BN, 5, 6912)
print("=====================================================")
x = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(x)
print(x.shape)
x = Bidirectional(GRU(256, return_sequences=False, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(x)
print(x.shape)
print("=====================================================")
x = Dense(output_size, kernel_initializer='he_normal', name='dense1')(x)
print(x.shape)
print("=====================================================")
y_pred = Activation('softmax', name='softmax')(x)
print(y_pred.shape)

In [None]:
class Decoder(object):
    def __init__(self, greedy=True, beam_width=100, top_paths=1, **kwargs):
        self.greedy         = greedy
        self.beam_width     = beam_width
        self.top_paths      = top_paths
        self.language_model = kwargs.get('language_model', None)
        self.postprocessors = kwargs.get('postprocessors', [])

    def decode(self, y_pred, input_length):
        decoded = decode(y_pred, input_length, greedy=self.greedy, beam_width=self.beam_width,
                         top_paths=self.top_paths, language_model=self.language_model)
        preprocessed = []
        for output in decoded:
            out = output
            for postprocessor in self.postprocessors:
                out = postprocessor(out)
            preprocessed.append(out)

        return preprocessed
    
def decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1, **kwargs):
    language_model = kwargs.get('language_model', None)

    paths, logprobs = _decode(y_pred=y_pred, input_length=input_length,
                              greedy=greedy, beam_width=beam_width, top_paths=top_paths)
    if language_model is not None:
        # TODO: compute using language model
        raise NotImplementedError("Language model search is not implemented yet")
    else:
        # simply output highest probability sequence
        # paths has been sorted from the start
        result = paths[0]
    return result


### Model-LipNet

In [7]:
from keras.layers.core import Lambda
from keras import backend as K

# Actual loss calculation
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # From Keras example image_ocr.py:
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    # y_pred = y_pred[:, 2:, :]
    y_pred = y_pred[:, :, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

def CTC(name, args):
    return Lambda(ctc_lambda_func, output_shape=(1,), name=name)(args)

In [38]:
class LipNet(object):
    def __init__(self, img_c, img_w, img_h, frames_n, output_size):
        self.img_c = img_c
        self.img_w = img_w
        self.img_h = img_h
        self.frames_n = frames_n
        
        self.output_size = output_size
        self.build()
        
    def build(self):
        input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)
        #if K.image_data_format() == 'channels_first':
            #input_shape = (self.img_c, self.frames_n, self.img_w, self.img_h)
        #else:
            #input_shape = (self.frames_n, self.img_w, self.img_h, self.img_c)
        
        self.input_data = Input(name = 'the_input', shape = (5, 100, 200, 3), dtype = 'float32')
        self.zero1 = ZeroPadding3D(padding=(1, 2, 2), name='zero1')(self.input_data)
        self.conv1 = Conv3D(32, (3, 5, 5), strides=(1, 2, 2), kernel_initializer='he_normal', name='conv1')(self.zero1)
        self.batc1 = BatchNormalization(name='batc1')(self.conv1)
        self.actv1 = Activation('relu', name='actv1')(self.batc1)
        self.drop1 = SpatialDropout3D(0.5)(self.actv1)
        self.maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max1')(self.drop1)

        self.zero2 = ZeroPadding3D(padding=(1, 2, 2), name='zero2')(self.maxp1)
        self.conv2 = Conv3D(64, (3, 5, 5), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv2')(self.zero2)
        self.batc2 = BatchNormalization(name='batc2')(self.conv2)
        self.actv2 = Activation('relu', name='actv2')(self.batc2)
        self.drop2 = SpatialDropout3D(0.5)(self.actv2)
        self.maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max2')(self.drop2)

        self.zero3 = ZeroPadding3D(padding=(1, 1, 1), name='zero3')(self.maxp2)
        self.conv3 = Conv3D(96, (3, 3, 3), strides=(1, 1, 1), kernel_initializer='he_normal', name='conv3')(self.zero3)
        self.batc3 = BatchNormalization(name='batc3')(self.conv3)
        self.actv3 = Activation('relu', name='actv3')(self.batc3)
        self.drop3 = SpatialDropout3D(0.5)(self.actv3)
        self.maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), name='max3')(self.drop3)


        self.resh1 = TimeDistributed(Flatten())(self.maxp3)


        self.gru_1 = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(self.resh1)
        self.gru_2 = Bidirectional(GRU(256, return_sequences=False, kernel_initializer='Orthogonal', name='gru2'), merge_mode='concat')(self.gru_1)

        # transforms RNN output to character activations:
        self.dense1 = Dense(self.output_size, kernel_initializer='he_normal', name='dense1')(self.gru_2)

        self.y_pred = Activation('softmax', name='softmax')(self.dense1)
        
        ####
        # y_true: (samples, max_string_length) : containing the truth labels
        # y_pred: (samples, time_steps, num_categories) : containing the prediction, or output of the softmax
        # input_length: (samples, 1): containing the sequence length for each batch item in y_pred
        # label_length: (samples, 1): containing the sequence length for each batch item in y_true
        ####
        self.labels = Input(name='the_labels', shape=[11], dtype='float32')
        self.input_length = Input(name='input_length', shape=[1], dtype='int64')
        self.label_length = Input(name='label_length', shape=[1], dtype='int64')

        self.loss_out = CTC('ctc', [self.y_pred, self.labels, self.input_length, self.label_length])

        self.model = Model(inputs=[self.input_data, self.labels, self.input_length, self.label_length], outputs=self.loss_out)
        
    def summary(self):
        Model(inputs = self.input_data, outputs = self.y_pred).summary()
        
    def predict(self, input_batch):
        return self.test_function([input_batch, 0])[0]  # the first 0 indicates test
    
    @property
    def test_function(self):
        # captures output of softmax so we can decode the output during visualization
        return K.function([self.input_data, K.learning_phase()], [self.y_pred, K.learning_phase()])

In [35]:
import keras
epochs = 100
early_stopping_patience = 10
# Add early stopping
early_stopping = keras.callbacks.EarlyStopping(monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True)

In [36]:
lipnet = LipNet(img_c=3, img_w=100, img_h=200, frames_n=5, output_size=11)
lipnet.summary()
adam = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam) 

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       [(None, 5, 100, 200, 3)]  0         
_________________________________________________________________
zero1 (ZeroPadding3D)        (None, 7, 104, 204, 3)    0         
_________________________________________________________________
conv1 (Conv3D)               (None, 5, 50, 100, 32)    7232      
_________________________________________________________________
batc1 (BatchNormalization)   (None, 5, 50, 100, 32)    128       
_________________________________________________________________
actv1 (Activation)           (None, 5, 50, 100, 32)    0         
_________________________________________________________________
spatial_dropout3d (SpatialDr (None, 5, 50, 100, 32)    0         
_________________________________________________________________
max1 (MaxPooling3D)          (None, 5, 25, 50, 32)     0   

In [45]:
#reconstructed_model = lipnet.model.load_weights("weights368.h5")

In [46]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.2, random_state = 1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state = 1)

In [47]:
from keras.callbacks import ModelCheckpoint
checkpoint  = ModelCheckpoint('ModelCheckpoint/',monitor='val_loss', save_weights_only=True, mode='auto')

In [48]:
#input_length= [[5] for _ in range(총 갯수)]
print(X_train.shape, y_train.shape)
input_length_X = [[5] for _ in range(X_train.shape[0])]
label_length_X = [[1] for _ in range(X_train.shape[0])]
input_length_v = [[5] for _ in range(X_val.shape[0])]
label_length_v = [[1] for _ in range(X_val.shape[0])]

(1612, 5, 100, 200, 3) (1612, 11)


In [49]:
X_train = np.array(X_train,dtype='float64')
y_train = np.array(y_train, dtype ='float64')
X_val = np.array(X_val,dtype='float64')
y_val = np.array(y_val, dtype ='float64')
input_length_X = np.array(input_length_X,dtype='float64')
label_length_X = np.array(label_length_X, dtype ='float64')
input_length_v = np.array(input_length_v,dtype='float64')
label_length_v = np.array(label_length_v, dtype ='float64')

In [50]:
history = lipnet.model.fit(x = [X_train, y_train, input_length_X, label_length_X], y = y_train, validation_data = [X_val, y_val, input_length_v, label_length_v],  batch_size = 8, epochs=100, verbose= 1, callbacks=[checkpoint, early_stopping], initial_epoch=0)

Epoch 1/100
Epoch 2/100

KeyboardInterrupt: 

In [None]:
def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc=0)

def plot_acc(history):
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc=0)

In [None]:
''''
if start_epoch > 0:
    weight_file = os.path.join(OUPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
    lipnet.model.load_weights(weight_file)
    
    # checkpoint
    tensorboard = TensorBoard(log_dir=os.path.join(LOG_DIR, run_name))
    checkpoint  = ModelCheckpoint(os.path.join(OUTPUT_DIR, run_name, "weights{epoch:02d}.h5"), monitor='val_loss', save_weights_only=True, mode='auto', period=1)
'''

In [None]:
#y_pred = lipnet.predict(x_data)

In [None]:
result = decoder.decode(y_pred, input_length)[0]

In [None]:
def main():
    data_flow = data.generator.flow(data.x_train, data.y_train,
                                        batch_size=batch_size)

    history = model.fit_generator(data_flow, epochs=epochs, steps_per_epoch=10,
                                      verbose=2, validation_data=(data.x_test, data.y_test))

    performance_test = model.evaluate(data.x_test, data.y_test, batch_size=100, verbose=0)
    print('\nTest Result ->', performance_test)

    plot_acc(history)
    plt.show()
    plot_loss(history)
    plt.show()

In [None]:
 
def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc=0)

def plot_acc(history):
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc=0)
plot_loss(history)