In [1]:
from data import load_data
import matplotlib.pyplot as plt
import os
import numpy as np

import time

from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers.recurrent import GRU
from keras.layers import Input
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding3D
from keras.layers.core import Lambda, Dropout, Flatten, Dense, Activation
from keras.optimizers import Adam
from keras import backend as K

  (fname, cnt))
  (fname, cnt))
Using TensorFlow backend.


In [2]:
CURRENT_PATH = '/home/ubuntu/assignments/machine-lip-reading/preprocessing'
DATA_PATH = CURRENT_PATH + '/../data'

In [3]:
def ctc_lambda_func(args):
    import tensorflow as tf
    y_pred, labels, input_length, label_length = args
    # From Keras example image_ocr.py:
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    # y_pred = y_pred[:, 2:, :]
    label_length = K.cast(tf.squeeze(label_length),'int32')
    input_length = K.cast(tf.squeeze(input_length),'int32')
    labels = K.ctc_label_dense_to_sparse(labels, label_length)
    #y_pred = y_pred[:, :, :]
    #return K.ctc_batch_cost(labels, y_pred, input_length, label_length, ignore_longer_outputs_than_inputs=True)
    return tf.nn.ctc_loss(labels, y_pred, input_length, ctc_merge_repeated=False,
                         ignore_longer_outputs_than_inputs = True, time_major = False)

In [4]:
def CTC(name, args):
	return Lambda(ctc_lambda_func, output_shape=(1,), name=name)(args)

In [5]:
def build_model(input_size, output_size = 28, max_string_len = 10):
    # model = Sequential()
    input_data = Input(name='the_input', shape=input_size, dtype='float32')
    x = ZeroPadding3D(padding=(0,2,2), name='padding1')(input_data)
    x = TimeDistributed(Conv2D(filters = 32, kernel_size = 5, strides = (2,2),
                             padding = 'same', activation = 'relu'))(x)
    print
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    x = TimeDistributed(Conv2D(filters=32, kernel_size=5, strides=(2, 2),
                               padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    x = TimeDistributed(Conv2D(filters=4, kernel_size=5, strides=(2, 2),
                               padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    input_lstm = TimeDistributed(Flatten())(x)

    x_lstm = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(input_lstm)
    x_lstm = Dense(output_size, kernel_initializer='he_normal', name='dense1')(x_lstm)
    print("after dense1")
    y_pred = Activation('softmax', name='softmax')(x_lstm)

    labels = Input(name='the_labels', shape = [max_string_len], dtype='int32')
    input_length = Input(name = 'input_length', shape =[1], dtype = 'int32')
    label_length = Input(name = 'label_length', shape = [1], dtype = 'int32')
    loss = CTC('ctc',[y_pred, labels, input_length, label_length])
    model = Model(inputs=[input_data, labels, label_length, input_length],
                  outputs = loss)
    model.summary()
    # Build model here...

    return model

In [6]:
def pad_labels(labels, max_string_len):
    padding = np.ones((labels.shape[0], max_string_len - labels.shape[1])) * -1
    return np.concatenate((labels, padding), axis = 1)

In [7]:
def train(model, x_train, y_train, label_len_train, input_len_train, batch_size=256, epochs=100, val_train_ratio=0.2):
    max_string_len = 10
    if y_train.shape[1] != max_string_len:
        y_train = pad_labels(y_train, max_string_len)

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam)
    history = model.fit(x = {'the_input':x_train, 'the_labels':y_train, 'label_length':label_len_train,
                             'input_length':input_len_train}, y = {'ctc': np.zeros([x_train.shape[0]])},
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_split=val_train_ratio,
                        shuffle=True,
                        verbose=1)

    return history

In [8]:
def read_data():
    oh = OneHotEncoder()
    le = LabelEncoder()

    x = list()
    y = list()
    t = list()
    print("loading images...")
    for i, (img, words) in enumerate(load_data(DATA_PATH, verbose=False, framebyframe=False)):
        if img.shape[0] != 75:
            continue
        x.append(img)
        y.append(words)

        t += words.tolist()
        if i == 3:
            break

    t = le.fit_transform(t)
    oh.fit(t.reshape(-1, 1))

    print("convering to np array...")
    x = np.stack(x, axis=0)

    print("transforming y...")
    for i in range(len(y)):
        y_ = le.transform(y[i])
        y[i] = np.asarray(oh.transform(y_.reshape(-1, 1)).todense())
    y = np.stack(y, axis=0)

    return x, y

In [9]:
from data_lstm import load_data
count = load_data(DATA_PATH, 's1')

0: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwaj8p.mpg
6: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbifzp.mpg
12: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobras7s.mpg
18: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbwh8p.mpg
24: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwwm1s.mpg
30: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbb2p.mpg
36: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrik4p.mpg
42: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgamzp.mpg
48: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwwq8n.mpg
54: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrar3a.mpg
60

480: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwad2n.mpg
486: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswavzp.mpg
492: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbu3s.mpg
498: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbig6p.mpg
504: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgbm2n.mpg
510: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbaa4n.mpg
516: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbwm7a.mpg
522: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgwr7a.mpg
528: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbas2p.mpg
534: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videos

972: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwij2n.mpg
978: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobragzp.mpg
984: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbwe6p.mpg
990: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbws9s.mpg
996: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgid7a.mpg
saving numpy
1002: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbwp8p.mpg
1008: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrah5s.mpg
1014: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbij8p.mpg
1020: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgwa1a.mpg
1026: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing

1428: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbba8n.mpg
1434: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbib7s.mpg
1440: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwbn5a.mpg
1446: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbav5a.mpg
1452: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbia1s.mpg
1458: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgwd1s.mpg
1464: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbh4n.mpg
1470: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbbm1s.mpg
1476: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbas1s.mpg
1482: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

1914: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgbm5a.mpg
1920: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgbc7s.mpg
1926: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbat7s.mpg
1932: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbig7a.mpg
1938: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgaf6p.mpg
1944: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbid4p.mpg
1950: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosran8n.mpg
1956: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswiu6p.mpg
1962: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprbp8n.mpg
1968: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

2400: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgwo5a.mpg
2406: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswbpzp.mpg
2412: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrwr8n.mpg
2418: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgij8n.mpg
2424: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgaq9a.mpg
2430: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwwmzn.mpg
2436: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswbi5s.mpg
2442: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrit8n.mpg
2448: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgwzzp.mpg
2454: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

2886: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgid5s.mpg
2892: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgwo4p.mpg
2898: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwws4n.mpg
2904: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswav1a.mpg
2910: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgwl2p.mpg
2916: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgwg3s.mpg
2922: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwik8n.mpg
2928: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbbm2p.mpg
2934: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwbyzp.mpg
2940: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

3372: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbbq9s.mpg
3378: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopric4p.mpg
3384: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgia5a.mpg
3390: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgav7a.mpg
3396: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgif1s.mpg
3402: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswbv3s.mpg
3408: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwilzp.mpg
3414: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswwv8p.mpg
3420: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbah1a.mpg
3426: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

3858: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbig5s.mpg
3864: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrwf3a.mpg
3870: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgav6p.mpg
3876: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbapzp.mpg
3882: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwbh1a.mpg
3888: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrin6p.mpg
3894: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbwu4n.mpg
3900: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbayzp.mpg
3906: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosriu1a.mpg
3912: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

4302: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwir2n.mpg
4308: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbiz2p.mpg
4314: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbao8n.mpg
4320: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbb5s.mpg
4326: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwir4p.mpg
4332: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgiv3a.mpg
4338: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgiczp.mpg
4344: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwwk4n.mpg
4350: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwbn2n.mpg
4356: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

4758: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgat8n.mpg
4764: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgia4p.mpg
4770: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgav4n.mpg
4776: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbbf7s.mpg
4782: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwij5a.mpg
4788: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrwz2n.mpg
4794: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgix9a.mpg
4800: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprbqzp.mpg
4806: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgal9s.mpg
4812: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

5214: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwba4n.mpg
5220: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswwp3s.mpg
5226: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgib9s.mpg
5232: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrin5s.mpg
5238: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgif3a.mpg
5244: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgir8n.mpg
5250: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwbz6p.mpg
5256: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgwdzn.mpg
5262: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbbr1a.mpg
5268: reading - /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data

KeyboardInterrupt: 