In [48]:
from IPython.display import Video
from keras.optimizers import Adam
import pandas as pd
import numpy as np
from model import *
from decode import *
from spell import *
from wer import *
import os

In [49]:
def labels_to_text(labels):
    tokens = {"_": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "E": 5, "T": 6, "A": 7, "O": 8, "N": 9, "I": 10, "H": 11, "S": 12, "R": 13, "D": 14, "L": 15, "U": 16, "M": 17, "W": 18, "C": 19, "F": 20, "G": 21, "Y": 22, "P": 23, "B": 24, "V": 25, "K": 26, "'": 27, "X": 28, "J": 29, "Q": 30, "Z": 31}
    rev_tokens = {value : key for (key, value) in tokens.items()}
    text = ''
    for l in labels:
        if l == -1:
            continue
        text += rev_tokens[l]
    return text

def labels_to_text(labels):
    # 26 is space, 27 is CTC blank char
    text = ''
    for c in labels:
        if c >= 0 and c < 26:
            text += chr(c + ord('a'))
        elif c == 26:
            text += ' '
    return text

In [50]:
y = pd.read_csv('y_label.csv')
y = y.drop('Unnamed: 0',axis=1)
y['ctc_text'] += '____'

In [51]:
paths = os.listdir('test_data')
paths_dict = {}
for file in paths:
    if file[-4:] == '.mp4':
        paths_dict[file[:-4]] = {'video':file,'array':''}
    if file[-4:] == '.npy':
        paths_dict[file.split('_')[1][:-4]]['array'] = file

In [85]:
def predict(array):
    path = 'test_data/'+array
    xtest = np.load(path)
    #xtest = np.transpose(xtest,(0,2,1,3))
    xtest = np.expand_dims(xtest, axis=0)
    #print(xtest.shape)
    spell = Spell(path='./grid.txt')
    decoder = Decoder(greedy=False, beam_width=200, postprocessors=[labels_to_text, spell.sentence])

    lipnet = LipNet(img_c=3, img_w=100, img_h=50, frames_n=75, absolute_max_string_len=32, output_size=28)
    adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
    lipnet.model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam)
    lipnet.run_eagerly = True
    lipnet.model.load_weights('model_weights_9415.h5')

    y_pred = lipnet.model.predict(xtest)

    pred = decoder.decode(y_pred, [75])[0]

    wer = wer_sentence(pred.lower(), y[y['ids'] == array[:-4]]['transcriptions'].values[0].lower())
    print('True : {}'.format(pred)+'\n'+'Pred : {}'.format(y[y['ids'] == array[:-4]]['transcriptions'].values[0].lower())+'\n'+'WER : {} %'.format(str(wer)))
    #return pred,wer

In [86]:
file = list(paths_dict.keys())[0]
video = paths_dict[file]['video']
Video('test_data/'+video)

In [87]:
array = paths_dict[file]['array']
predict(array)

True : bin blue at d nine now
Pred : benblu at dean nine now
WER : 50.0 %


In [88]:
file = list(paths_dict.keys())[1]
video = paths_dict[file]['video']
Video('test_data/'+video)

In [89]:
array = paths_dict[file]['array']
predict(array)

True : bin blue at e three please
Pred : bin blue at e three please
WER : 0.0 %


In [90]:
file = list(paths_dict.keys())[2]
video = paths_dict[file]['video']
Video('test_data/'+video)

In [91]:
array = paths_dict[file]['array']
predict(array)

True : bin blue at f three please
Pred : been blue at f three please
WER : 16.666666666666664 %


In [92]:
file = list(paths_dict.keys())[3]
video = paths_dict[file]['video']
Video('test_data/'+video)

In [93]:
array = paths_dict[file]['array']
predict(array)

True : bin blue at i four now
Pred : ben bluitiporna
WER : 100.0 %


In [94]:
file = list(paths_dict.keys())[4]
video = paths_dict[file]['video']
Video('test_data/'+video)

In [95]:
array = paths_dict[file]['array']
predict(array)

True : bin blue at j four again
Pred : bin blue at je for again
WER : 33.33333333333333 %
