In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
from tensorflow import keras
import os
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
path = "drive/MyDrive/Lipread"

In [None]:
model_path = os.path.join(path, "model.h5")

In [None]:
def CTCLoss(y_true, y_pred):
    # Compute the training-time loss value
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [None]:
model = tf.keras.models.load_model(model_path, custom_objects={'CTCLoss': CTCLoss})

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 75, 25, 50, 32)    7232      
                                                                 
 batch_normalization (Batch  (None, 75, 25, 50, 32)    128       
 Normalization)                                                  
                                                                 
 activation (Activation)     (None, 75, 25, 50, 32)    0         
                                                                 
 spatial_dropout3d (Spatial  (None, 75, 25, 50, 32)    0         
 Dropout3D)                                                      
                                                                 
 max_pooling3d (MaxPooling3  (None, 75, 12, 25, 32)    0         
 D)                                                              
                                                        

In [None]:
data_path = os.path.join(path, "x_val.npy")
data_path

'drive/MyDrive/Lipread/x_val.npy'

In [None]:
token_path = os.path.join(path, "y_val.npy")
token_path

'drive/MyDrive/Lipread/y_val.npy'

In [None]:
with tf.device('/CPU:0'):
  data = np.load(data_path)

In [None]:
with tf.device('/CPU:0'):
  tokens = np.load(token_path)
tokens

array([[16, 12,  1, ...,  0,  0,  0],
       [12,  1, 25, ...,  0,  0,  0],
       [19,  5, 20, ...,  0,  0,  0],
       ...,
       [16, 12,  1, ...,  0,  0,  0],
       [12,  1, 25, ...,  0,  0,  0],
       [19,  5, 20, ...,  0,  0,  0]])

In [None]:
test = np.array([data[0]])
test.shape

(1, 75, 50, 100, 3)

In [None]:
pred = model.predict(test)



In [None]:
print(pred)

[[[2.7728998e-08 7.6338716e-07 2.0142789e-04 ... 1.0398776e-07
   2.4481401e-06 1.5878054e-08]
  [1.7674633e-08 1.2174378e-04 3.4090231e-06 ... 9.2722637e-08
   4.1156252e-08 1.5850958e-07]
  [1.9170782e-08 9.9936301e-01 1.1203183e-06 ... 4.3423338e-08
   6.6272414e-09 4.2172851e-06]
  ...
  [3.1683296e-08 2.9626192e-04 1.6498934e-07 ... 1.2517170e-07
   6.1109007e-05 9.9421740e-01]
  [5.0475722e-07 4.8327074e-08 1.0699538e-06 ... 3.0184668e-07
   9.5517084e-05 9.4804991e-06]
  [5.6318083e-08 4.9439397e-08 3.8136696e-07 ... 1.5100633e-07
   2.6998325e-04 1.6098704e-07]]]


In [None]:
y_pred = np.argmax(pred, axis=2)
y_pred = y_pred.flatten()
y_pred

array([16, 12,  1,  3,  5, 27,  2, 12, 21, 21,  5,  5, 27, 27, 27, 27,  1,
       28, 28, 20, 27, 27, 27, 27,  9, 28, 28, 28, 28, 28, 27, 27, 27, 27,
        6,  6, 15, 15, 28, 28, 28, 28, 28, 28, 21, 21, 21, 28, 28, 28, 28,
       28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 18, 18, 27, 27, 27,
       27, 27, 27, 14, 28, 15, 23])

In [None]:
vocab = [x for x in "abcdefghijklmnopqrstuvwxyz "]
char_to_num = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="")
num_to_char = tf.keras.layers.StringLookup(vocabulary=vocab, oov_token="", invert=True)

In [None]:
def decode_ctc_predictions(predictions, blank_token):
    decoded_output = []
    previous_char = None

    for char in predictions:
        if char == blank_token:
            continue
        if char != previous_char:
            decoded_output.append(char)
        previous_char = char
    return ''.join(decoded_output)


In [None]:
text_pred = num_to_char(y_pred)
text_true = num_to_char(tokens[0])
riel_pred = np.array(text_pred)
riel_pred = [i.decode('utf-8') for i in riel_pred]
riel_true = np.array(text_true)
riel_true = [i.decode('utf-8') for i in riel_true]
decoded_result = decode_ctc_predictions(riel_pred, blank_token='')
decoded_true = decode_ctc_predictions(riel_true, blank_token='')
print("Non decoded result:", tf.strings.reduce_join(text_pred))
print("Decoded result:", decoded_result)
print("True result:", decoded_true)

Non decoded result: tf.Tensor(b'place bluuee    at    i    ffoouuurr      now', shape=(), dtype=string)
Decoded result: place blue at i four now
True result: place blue at i four now
