In [1]:
from data import load_data
import matplotlib.pyplot as plt
import os
import numpy as np

import time

from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers.recurrent import GRU
from keras.layers import Input
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding3D
from keras.layers.core import Lambda, Dropout, Flatten, Dense, Activation
from keras.optimizers import Adam
from keras import backend as K

  (fname, cnt))
  (fname, cnt))
Using TensorFlow backend.


In [2]:
CURRENT_PATH = '/home/ubuntu/assignments/machine-lip-reading/preprocessing'
DATA_PATH = CURRENT_PATH + '/../data'

In [3]:
def ctc_lambda_func(args):
    import tensorflow as tf
    y_pred, labels, input_length, label_length = args
    # From Keras example image_ocr.py:
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    # y_pred = y_pred[:, 2:, :]
    label_length = K.cast(tf.squeeze(label_length),'int32')
    input_length = K.cast(tf.squeeze(input_length),'int32')
    labels = K.ctc_label_dense_to_sparse(labels, label_length)
    #y_pred = y_pred[:, :, :]
    #return K.ctc_batch_cost(labels, y_pred, input_length, label_length, ignore_longer_outputs_than_inputs=True)
    return tf.nn.ctc_loss(labels, y_pred, input_length, ctc_merge_repeated=False,
                         ignore_longer_outputs_than_inputs = True, time_major = False)

In [4]:
def CTC(name, args):
	return Lambda(ctc_lambda_func, output_shape=(1,), name=name)(args)

In [5]:
def build_model(input_size, output_size = 28, max_string_len = 10):
    # model = Sequential()
    input_data = Input(name='the_input', shape=input_size, dtype='float32')
    x = ZeroPadding3D(padding=(0,2,2), name='padding1')(input_data)
    x = TimeDistributed(Conv2D(filters = 32, kernel_size = 5, strides = (2,2),
                             padding = 'same', activation = 'relu'))(x)
    print
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    x = TimeDistributed(Conv2D(filters=32, kernel_size=5, strides=(2, 2),
                               padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    x = TimeDistributed(Conv2D(filters=4, kernel_size=5, strides=(2, 2),
                               padding='same', activation='relu'))(x)
    x = TimeDistributed(MaxPooling2D(pool_size=(2,2), strides=None, name='max1'))(x)
    x = Dropout(0.5)(x)

    input_lstm = TimeDistributed(Flatten())(x)

    x_lstm = Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal', name='gru1'), merge_mode='concat')(input_lstm)
    x_lstm = Dense(output_size, kernel_initializer='he_normal', name='dense1')(x_lstm)
    print("after dense1")
    y_pred = Activation('softmax', name='softmax')(x_lstm)

    labels = Input(name='the_labels', shape = [max_string_len], dtype='int32')
    input_length = Input(name = 'input_length', shape =[1], dtype = 'int32')
    label_length = Input(name = 'label_length', shape = [1], dtype = 'int32')
    loss = CTC('ctc',[y_pred, labels, input_length, label_length])
    model = Model(inputs=[input_data, labels, label_length, input_length],
                  outputs = loss)
    model.summary()
    # Build model here...

    return model

In [6]:
def pad_labels(labels, max_string_len):
    padding = np.ones((labels.shape[0], max_string_len - labels.shape[1])) * -1
    return np.concatenate((labels, padding), axis = 1)

In [7]:
def train(model, x_train, y_train, label_len_train, input_len_train, batch_size=256, epochs=100, val_train_ratio=0.2):
    max_string_len = 10
    if y_train.shape[1] != max_string_len:
        y_train = pad_labels(y_train, max_string_len)

    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam)
    history = model.fit(x = {'the_input':x_train, 'the_labels':y_train, 'label_length':label_len_train,
                             'input_length':input_len_train}, y = {'ctc': np.zeros([x_train.shape[0]])},
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_split=val_train_ratio,
                        shuffle=True,
                        verbose=1)

    return history

In [8]:
def read_data():
    oh = OneHotEncoder()
    le = LabelEncoder()

    x = list()
    y = list()
    t = list()
    print("loading images...")
    for i, (img, words) in enumerate(load_data(DATA_PATH, verbose=False, framebyframe=False)):
        if img.shape[0] != 75:
            continue
        x.append(img)
        y.append(words)

        t += words.tolist()
        if i == 3:
            break

    t = le.fit_transform(t)
    oh.fit(t.reshape(-1, 1))

    print("convering to np array...")
    x = np.stack(x, axis=0)

    print("transforming y...")
    for i in range(len(y)):
        y_ = le.transform(y[i])
        y[i] = np.asarray(oh.transform(y_.reshape(-1, 1)).todense())
    y = np.stack(y, axis=0)

    return x, y

In [1]:
from data_lstm import load_data
CURRENT_PATH = '/home/ubuntu/assignments/machine-lip-reading/preprocessing'
DATA_PATH = CURRENT_PATH + '/../data'
count = load_data(DATA_PATH, 's1')

0: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwaj8p.mpg
6: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbifzp.mpg
12: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobras7s.mpg
18: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbwh8p.mpg
24: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwwm1s.mpg
30: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbb2p.mpg
36: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrik4p.mpg
42: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgamzp.mpg
48: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwwq8n.mpg
54: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing

462: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrbe7s.mpg
468: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwaz1s.mpg
474: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwim5s.mpg
480: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwad2n.mpg
486: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswavzp.mpg
492: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbu3s.mpg
498: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbig6p.mpg
504: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgbm2n.mpg
510: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbaa4n.mpg
516: reading from /home/ubuntu/assignments/machine-lip-reading/p

936: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwie4n.mpg
942: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrah6p.mpg
948: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbig4n.mpg
954: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrah7a.mpg
960: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbb6p.mpg
966: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopriv7a.mpg
972: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwij2n.mpg
978: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobragzp.mpg
984: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbwe6p.mpg
990: reading from /home/ubuntu/assignments/machine-lip-reading/p

1380: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprbx5a.mpg
1386: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgbc6n.mpg
1392: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwig2p.mpg
1398: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgwd3a.mpg
1404: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbib9a.mpg
1410: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgia2n.mpg
1416: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbwj2n.mpg
1422: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswab9a.mpg
1428: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbba8n.mpg
1434: reading from /home/ubuntu/assignments/machine-lip

1854: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgwm9a.mpg
1860: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgbn9s.mpg
1866: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgah3a.mpg
1872: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolriyzn.mpg
1878: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswwc5s.mpg
1884: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopriv5s.mpg
1890: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprap6p.mpg
1896: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbif1a.mpg
1902: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwbn4p.mpg
1908: reading from /home/ubuntu/assignments/machine-lip

2322: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwba5s.mpg
2328: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrwvzn.mpg
2334: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgiz5a.mpg
2340: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswih6n.mpg
2346: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgal8n.mpg
2352: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbo5a.mpg
2358: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbbk5s.mpg
2364: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrwi5a.mpg
2370: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgwx3s.mpg
2376: reading from /home/ubuntu/assignments/machine-lip

2796: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolray4n.mpg
2802: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbu7s.mpg
2808: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbbk6p.mpg
2814: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgwe6n.mpg
2820: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgit7a.mpg
2826: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwat2n.mpg
2832: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrak7s.mpg
2838: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwbx9s.mpg
2844: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgwazp.mpg
2850: reading from /home/ubuntu/assignments/machine-lip

3264: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgbe2n.mpg
3270: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprwq3s.mpg
3276: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgbs9a.mpg
3282: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopric5a.mpg
3288: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbu9a.mpg
3294: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbbz8n.mpg
3300: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprwd5s.mpg
3306: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgbxzp.mpg
3312: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbaz6p.mpg
3318: reading from /home/ubuntu/assignments/machine-lip

3738: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgwt3a.mpg
3744: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoprwd4n.mpg
3750: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwag5s.mpg
3756: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbiq2p.mpg
3762: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrbe6n.mpg
3768: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgia3s.mpg
3774: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgat9s.mpg
3780: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwim4n.mpg
3786: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwbq4n.mpg
3792: reading from /home/ubuntu/assignments/machine-lip

4194: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbay1a.mpg
4200: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgbc9a.mpg
4206: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwad3s.mpg
4212: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolrarzn.mpg
different size, skip
different size, skip
different size, skip
different size, skip
different size, skip
different size, skip
4212: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwim7a.mpg
4218: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbal6n.mpg
4224: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosran9s.mpg
different size, skip
different size, skip
different size, skip
different size, skip
different size, skip
different size, skip
4224: reading

4626: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosriuzp.mpg
4632: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopwix3a.mpg
4638: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwwn8p.mpg
4644: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobrbazp.mpg
4650: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbwg2p.mpg
4656: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolgbgzp.mpg
4662: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwaazn.mpg
4668: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbal9a.mpg
4674: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrau2n.mpg
4680: reading from /home/ubuntu/assignments/machine-lip

5058: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrbb7a.mpg
5064: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwaa3a.mpg
5070: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwwa8n.mpg
5076: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopbib6n.mpg
5082: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgby4n.mpg
5088: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobgin1s.mpg
5094: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosbwb3s.mpg
5100: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobras6n.mpg
5106: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswbc2p.mpg
5112: reading from /home/ubuntu/assignments/machine-lip

5532: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobbaszn.mpg
5538: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolwbl9a.mpg
5544: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videopgiq2n.mpg
5550: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videolbwe4n.mpg
5556: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosrwb9s.mpg
5562: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videobwit1a.mpg
5568: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswwc7a.mpg
5574: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videoswab8p.mpg
5580: reading from /home/ubuntu/assignments/machine-lip-reading/preprocessing/../data/s1/videosgap1s.mpg
5586: reading from /home/ubuntu/assignments/machine-lip