In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os

Initialize path

In [None]:
path = "drive/MyDrive/Lipread" # Change to desired data path
path

'drive/MyDrive/Lipread'

In [None]:
x_train_path = os.path.join(path, "x_train.npy")
y_train_path = os.path.join(path, "y_train.npy")
x_val_path = os.path.join(path, "x_val.npy")
y_val_path = os.path.join(path, "y_val.npy")

In [None]:
with tf.device('/CPU:0'):
  x_train = np.load(x_train_path)
  y_train = np.load(y_train_path)
  x_val = np.load(x_val_path)
  y_val = np.load(y_val_path)

In [None]:
with tf.device('/CPU:0'):
  input_shape = x_train[0].shape
input_shape

(75, 50, 100, 3)

Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv3D, GRU, Dense, Bidirectional, MaxPool3D, Activation, TimeDistributed, Flatten, SpatialDropout3D, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, CSVLogger, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras

In [None]:
model = Sequential()
input_shape = (75, 50, 100, 3)

model.add(Conv3D(32, (3, 5, 5), strides=(1, 2, 2), padding='same', input_shape=input_shape, kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.2))
model.add(MaxPool3D((1, 2, 2), strides=(1, 2, 2)))

model.add(Conv3D(64, (3, 5, 5), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.2))
model.add(MaxPool3D((1, 2, 2), strides=(1, 2, 2)))

model.add(Conv3D(96, (3, 3, 3), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.2))
model.add(MaxPool3D((1, 2, 2), strides=(1, 2, 2)))

model.add(TimeDistributed(Flatten()))

model.add(Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal')))
model.add(Dropout(0.5))

model.add(Bidirectional(GRU(256, return_sequences=True, kernel_initializer='Orthogonal')))
model.add(Dropout(0.5))

model.add(Dense(29, kernel_initializer='he_normal', activation='softmax'))

In [None]:
def CTCLoss(y_true, y_pred):
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    label_length = tf.reduce_sum(tf.cast(tf.not_equal(y_true, 0), dtype="int64"), axis=-1)
    label_length = tf.reshape(label_length, [batch_len, 1])

    loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss=CTCLoss)

In [None]:
model_path = os.path.join(path, "model.h5")
checkpoint_callback = ModelCheckpoint(
    filepath=model_path,
    monitor='val_loss',
    save_weights_only=False,
    save_best_only=True,
    verbose=1
)

In [None]:
csv_path = os.path.join(path, "training_log.csv")
csv_logger = CSVLogger(csv_path)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, min_lr=1e-6)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=15, verbose=1)

In [None]:
model.fit(x_train, y_train, epochs=250, batch_size=8, validation_data=(x_val, y_val), callbacks=[checkpoint_callback, reduce_lr, csv_logger, early_stopping_callback])