In [1]:
import numpy as np
import tensorflow as tf 
import pickle
import os
import glob
import tqdm
import matplotlib.pyplot as plt
import itertools
import multiprocessing
%matplotlib inline

In [64]:
with open("../data/lip_reading/new_alig", "r") as f:
    labels = f.readlines()

In [65]:
labels = list(map(lambda x: x.strip().split(), labels))

In [66]:
file2words = {}

In [67]:
for filename, *phones in labels:
    file2words[filename] = "".join((sound for sound, _ in itertools.groupby(phones)))
    file2words[filename] = file2words[filename].replace("SIL", " ").strip()

In [68]:
file2words["M0202_02_48107_Win_Dell"]

"chi1ty0r'i4vo0s'i4m'a1d'i0no0l' s'e0m'"

In [69]:
list(file2words.values())[0]

"vo0s'i4m'chi1ty0r'i4d'e0v'i4t'a1d'i0nshe0s't'"

In [70]:
with open("../data/lip_reading/lexicon", "r") as f:
    lexicon = f.readlines()

In [71]:
lexicon

['<sil> SIL\n',
 "ноль n o0 l'\n",
 "один a1 d' i0 n\n",
 'два d v a0\n',
 "три t r' i0\n",
 "четыре ch i1 t y0 r' i4\n",
 "пять p' a0 t'\n",
 "шесть sh e0 s' t'\n",
 "семь s' e0 m'\n",
 "восемь v o0 s' i4 m'\n",
 "девять d' e0 v' i4 t'\n"]

In [72]:
word2phonemes = {}

In [73]:
for row in lexicon[1:]:
    word, *phones = row.strip().split()
    word2phonemes[word] = "".join(phones)

In [74]:
word2phonemes

{'восемь': "vo0s'i4m'",
 'два': 'dva0',
 'девять': "d'e0v'i4t'",
 'ноль': "no0l'",
 'один': "a1d'i0n",
 'пять': "p'a0t'",
 'семь': "s'e0m'",
 'три': "tr'i0",
 'четыре': "chi1ty0r'i4",
 'шесть': "she0s't'"}

In [75]:
word2num = {
    "ноль": 0,
    "один": 1,
    "два": 2,
    "три": 3,
    "четыре": 4,
    "пять": 5,
    "шесть": 6,
    "семь": 7,
    "восемь": 8,
    "девять": 9,
}

In [76]:
from collections import OrderedDict

In [77]:
phonemes2word = OrderedDict(zip(word2phonemes.values(), word2phonemes.keys()))
phonemes2word["o0l'"] = "ноль" # хак

In [78]:
for filename in file2words.keys():
    current_str = file2words[filename]
    for transcription, word in phonemes2word.items():
        current_str = current_str.replace(transcription, " " + word + " ")
    current_str = " ".join(current_str.split())
    file2words[filename] = current_str

In [79]:
file2nums = {}
for filename in file2words.keys():
    try:
        file2nums[filename] = [word2num[word] for word in file2words[filename].split()]
    except:
        print(filename, file2words[filename])

In [36]:
list(file2words.items())[:100]

[('M0613_01_8435967021_Win_Dell',
  'восемь четыре три пять девять шесть семь ноль два один'),
 ('M0384_02_2431089576_Win_SonyVAIO',
  "два четыре три один o0l' восемь девять пять семь шесть"),
 ('M0516_01_69401_iPhone_6S', 'шесть девять четыре ноль один'),
 ('M0126_02_32507_Win_Dell', 'три два пять ноль семь'),
 ('M0135_03_87610_Android_SM_Gal_J3', 'восемь семь шесть один ноль'),
 ('M0470_01_0968431572_Win_SONY',
  'ноль девять шесть восемь четыре три один пять семь два'),
 ('F0634_01_63741_Win_Dell', 'шесть три семь четыре один'),
 ('F034_01_6271853904_iPhone_6s',
  'шесть два семь один восемь пять три девять ноль четыре'),
 ('M0770_01_25713_Win_SONY', 'два пять семь один три'),
 ('M0094_02_49267_Win_Sony', 'четыре девять два шесть семь'),
 ('M0606_01_5903264178_Android_SM',
  'пять девять ноль три два шесть четыре один семь восемь'),
 ('M0198_03_1329408576_Android_SM',
  'один три два девять четыре ноль восемь пять семь шесть'),
 ('F0405_01_9170236584_Win_SONY',
  'девять один семь 

In [80]:
len(file2words)

18726

In [81]:
with open("../data/lip_reading/file2words.pickle", "wb") as f:
    pickle.dump(file2words, f, protocol=pickle.HIGHEST_PROTOCOL)

In [82]:
with open("../data/lip_reading/file2nums.pickle", "wb") as f:
    pickle.dump(file2nums, f, protocol=pickle.HIGHEST_PROTOCOL)

In [86]:
with open("../data/lip_reading/1_video_lips/train_male/M0003_02_0173924658_Android_SM_Gal_J3.pickle", "rb") as f:
    current_video = pickle.load(f)
    a1 = pickle.load(f)
    a2 = pickle.load(f)
    a3 = pickle.load(f)

In [95]:
file2nums["M0003_02_0173924658_Android_SM_Gal_J3"]

[0, 1, 7, 3, 9, 2, 4, 6, 5, 8]

In [90]:
a1.shape

(140, 20, 2)

In [85]:
current_video.shape

(140, 50, 30)

In [92]:
a3, len(a3)

(['0000',
  '0001',
  '0002',
  '0003',
  '0004',
  '0005',
  '0006',
  '0007',
  '0008',
  '0009',
  '0010',
  '0011',
  '0012',
  '0013',
  '0014',
  '0015',
  '0016',
  '0017',
  '0018',
  '0019',
  '0020',
  '0021',
  '0022',
  '0023',
  '0024',
  '0025',
  '0026',
  '0027',
  '0028',
  '0029',
  '0030',
  '0031',
  '0032',
  '0033',
  '0034',
  '0035',
  '0036',
  '0037',
  '0038',
  '0039',
  '0040',
  '0041',
  '0042',
  '0043',
  '0044',
  '0045',
  '0046',
  '0047',
  '0048',
  '0049',
  '0050',
  '0051',
  '0052',
  '0053',
  '0054',
  '0055',
  '0056',
  '0057',
  '0058',
  '0059',
  '0060',
  '0061',
  '0062',
  '0063',
  '0064',
  '0065',
  '0066',
  '0067',
  '0068',
  '0069',
  '0070',
  '0071',
  '0072',
  '0073',
  '0074',
  '0075',
  '0076',
  '0077',
  '0078',
  '0079',
  '0080',
  '0081',
  '0082',
  '0083',
  '0084',
  '0085',
  '0086',
  '0087',
  '0088',
  '0089',
  '0090',
  '0091',
  '0092',
  '0093',
  '0094',
  '0095',
  '0096',
  '0097',
  '0098',
  '0099',


In [93]:
a2, len(a2)

(array([[   27.],
        [   67.],
        [   95.],
        [  130.],
        [  166.],
        [  203.],
        [  229.],
        [  265.],
        [  299.],
        [  332.],
        [  363.],
        [  395.],
        [  431.],
        [  466.],
        [  496.],
        [  532.],
        [  563.],
        [  596.],
        [  628.],
        [  661.],
        [  697.],
        [  730.],
        [  761.],
        [  795.],
        [  829.],
        [  863.],
        [  895.],
        [  929.],
        [  963.],
        [  997.],
        [ 1029.],
        [ 1063.],
        [ 1098.],
        [ 1131.],
        [ 1163.],
        [ 1197.],
        [ 1230.],
        [ 1265.],
        [ 1297.],
        [ 1330.],
        [ 1364.],
        [ 1398.],
        [ 1435.],
        [ 1464.],
        [ 1499.],
        [ 1533.],
        [ 1565.],
        [ 1598.],
        [ 1631.],
        [ 1668.],
        [ 1698.],
        [ 1732.],
        [ 1765.],
        [ 1804.],
        [ 1832.],
        [ 

In [96]:
140/5

28.0