In [264]:
import os
import numpy as np
from tabulate import tabulate
from tqdm import tqdm
# import keras
# import keras.backend as K

In [114]:
os.listdir()

['text',
 'distance.py',
 'nbest-sym.txt',
 'nbest.txt',
 '.ipynb_checkpoints',
 'nbest classifier.ipynb',
 'words.txt']

# read data

In [2]:
enc_phone = dict()
dec_phone = dict()

with open('words.txt') as file:
    for line in file:
        ph, num = line.strip().split(' ')
        enc_phone[ph] = num
        dec_phone[num] = ph

In [115]:
text = dict()

with open('nbest-sym.txt') as file:
    lines = file.readlines()
    change = False
    name = ''
    
    for line in lines:
        line = line.strip()
        if(line != ''):
            if(line[0] in ['F', 'M']):
                change = True
                name = line      
                text[name] = []
            elif(text[name] == []):                
                target = line.split(' ')[1:]
                text[name] = target           

In [116]:
label = dict()

with open('text') as file:
    for line in file:
        spl = line.strip().split(' ')
        label[spl[0]] = []
        for ph in spl[1:]:
            label[spl[0]].append(ph)

# Model (not use now)

In [80]:
from keras.layers import Input, Dense, Embedding, Activation, dot, Flatten
from keras.models import Model

n_lexicon = int(len(text))

def get_model():
    a = Input(shape=(25,))
    b = Input(shape=(25,))
    
    emb = Embedding(n_lexicon+1, 16)
    a_emb = emb(a)        
    b_emb = emb(b)    
                
    x = dot([a_emb, b_emb], axes=1)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)    
    
    model = Model(inputs=[a,b], outputs=x)
    return model

In [81]:
model = get_model()
model.compile('Adam', loss='mse', metrics=['acc'])

In [82]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_49 (InputLayer)           (None, 25)           0                                            
__________________________________________________________________________________________________
input_50 (InputLayer)           (None, 25)           0                                            
__________________________________________________________________________________________________
embedding_48 (Embedding)        (None, 25, 16)       9600        input_49[0][0]                   
                                                                 input_50[0][0]                   
__________________________________________________________________________________________________
dot_25 (Dot)                    (None, 16, 16)       0           embedding_48[0][0]               
          

In [72]:
words_dict = []
keys = []
maxlen = 25

for key in label:
    phonemes = label[key][:maxlen]
    addition = maxlen-len(phonemes)
    if(addition > 0):
        phonemes.extend([enc_phone["<SIL>"]]*addition)    
    words_dict.append(phonemes)

#     for phone in phonemes:        
#         temp = np.zeros(n_lexicon,dtype=np.int)
#         temp[int(phone)] = 1
#         words_dict[-1].append(temp)
    
    keys.append(key)

In [73]:
words_dict = np.array(words_dict, dtype='int')
words_dict.shape

(599, 25)

In [74]:
X_train_emb_a = np.concatenate((words_dict,words_dict), axis=0)
X_train_emb_b = np.concatenate((words_dict,np.flip(words_dict,0)), axis=0)
y_train_emb = [1]*(len(X_train_emb_a)//2) + [0]*(len(X_train_emb_a)//2)
y_train_emb = np.array(y_train_emb, dtype='int')

In [75]:
X_train_emb_a.shape, X_train_emb_b.shape, y_train_emb.shape

((1198, 25), (1198, 25), (1198,))

In [76]:
model.fit(x=[X_train_emb_a, X_train_emb_b], y=y_train_emb, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f28c01580f0>

# edit-distance

In [291]:
#dp edit-distance 
def distance(a,b, debug=False):
    N = len(a)
    M = len(b)
    dp = np.zeros((N+1,M+1), dtype=np.int)
    
    for i in range(N+1):
        dp[i][0] = i
        
    for j in range(M+1):
        dp[0][j] = j
    
    for i in range(N):
        for j in range(M):
            best_prev = min(dp[i][j], min(dp[i+1][j], dp[i][j+1]))
            if(a[i] == b[j]):
                dp[i+1][j+1] = dp[i][j]
            else:
                dp[i+1][j+1] = best_prev + measure_weight(a[i], b[j])
    if(debug):
        return dp
    else:
        return dp[N][M]
    
    
def debug(a = list("sunday"), b = list("suundaY")):
    table = distance(a, b, debug=True)
    a = list(a)
    b = list(b)
    tabs = []
    for i, row in enumerate(table):        
        tabs.append([([''] + a)[i]])
        tabs[-1].extend(row)

    print(tabulate(tabs, headers=['']+b))
    

def isVow(x):
    return x in ['a','aa', 'i', 'ii', 'v', 'vv', 'u', 'uu', 'e', 'ee',
                'x', 'xx', 'o', 'oo', '@', '@@', 'q', 'qq', 
                'ia', 'iia', 'va', 'vva', 'ua', 'uua']
    
    
def measure_weight(x, y):
    inter_vow_weight = 3 # a -> i
    partial_inter_vow_weight = 2 # vv -> vva, v -> va
    inside_vow_weight = 1 # v -> vv, ua -> uua
    norm_weight = 1;
#     return 1
    if(x == y): return 0
    try:
        x = dec_phone[x]
        y = dec_phone[y]
    except:
        pass
    
    if(isVow(x) or isVow(y)):
        if(len(x) > len(y)):
            x, y = y, x

        if( x[0]*2 + x[1:] == y ):
            return inside_vow_weight
        elif(x + 'a' == y):
            return partial_inter_vow_weight
        else:
            return inter_vow_weight
    else:
        return norm_weight

In [274]:
def find_closest(x):    
    possible = set()
    min_dist = 1e9
    for key, val in label.items():        
        dist = distance(x, val)
        if(dist < min_dist):
            min_dist = dist            
            possible = set([" ".join(val)])
        elif(dist == min_dist):
            possible.add(" ".join(val))
    return possible, min_dist

In [215]:
keys = list(text.keys())

In [277]:
k = keys[0]
print(' '.join(text[k]))
print(' '.join(label[k]))
print(k)

possible, min_dist = find_closest(text[k])
print(possible, min_dist)

l @@ phl aa ph @@ p a j^ j uu t^
l aa k @@ j^
F5731101921_0080
{'l aa k @@ j^', 'phl ee ng^ t @@ p a j^', 'kh @@ phl ee ng^ kl a j^', 'kh aa m^ p a j^'} 8


## test accuracy

In [None]:
%%time
acc = 0
amount = len(keys)
for key in tqdm(keys[:amount]):
    possible, min_dist = find_closest(text[key])
    ans = list(possible)[0]
    lab = " ".join(label[key])
    if(len(possible)>1):
        print(key)
        print(" ".join(text[key]))
        print(possible)
        print(lab, '\n')
    if(ans == lab): acc += 1
    
        

# print("acc {}".format(acc / amount))
acc /= amount

  3%|▎         | 15/599 [00:04<02:44,  3.54it/s]

M5730279821_0051
phl qq t^ phl ee ng^ phl ee ng^ j uu b a j^ l qq f^ j uu n^
{'l qq k^ l e n^', 'j u t^'}
p qq t^ phl ee ng^ 



  4%|▍         | 24/599 [00:06<02:32,  3.76it/s]

M5730282621_0075
phl ee ng^ phl ee ng^ phl ee ng^ j ii
{'kh aa m^ phl ee ng^ n ii', 'h aa m^ l e n^ phl ee ng^ n ii'}
l e n^ phl ee ng^ z a r a j^ k @@ d aa j^ n aa th ii 



  5%|▍         | 29/599 [00:07<02:29,  3.83it/s]

F5731101921_0055
t^ phl ee o th a m
{'', 'p qq t^ phl ee ng^ kl a j^', 'kh aa m^'}
p qq t^ phl ee ng^ 



  6%|▋         | 38/599 [00:09<02:17,  4.08it/s]

M5730279821_0048
ch a k^ c aa
{'ch a t^ d aa w^', ''}
ch a t^ d aa w^ 

M5730328921_0033
d aa k^ n ii
{'k oo w aa c ii', ''}
d a ng^ z ii k^ 



  7%|▋         | 41/599 [00:10<02:17,  4.07it/s]

M5730328921_0034
z qq l ii k^ ii
{'', 'ph qq m^ z ii k^', 'z ii k^'}
ph qq m^ z ii k^ 



  7%|▋         | 44/599 [00:10<02:13,  4.16it/s]

F5731101921_0049
phl i t^ phl ee j a m^ t^
{'l e n^ s a m^', 'p qq t^ phl ee ng^ kl a j^', 'kh aa m^'}
p i t^ s iia ng^ 



  8%|▊         | 47/599 [00:11<02:14,  4.11it/s]

M5731100221_0060
kh @@ phl ee ng^ s a br ee w i p^ n ii
{'', 'kh aa m^ phl ee ng^ n ii', 'kh @@ phl ee ng^ kl a j^'}
kh @@ phl ee ng^ 

F5731101921_0081
l e n^ phl qq t^ l ee ng^ ng^
{'l e n^ phl ee ng^', 'p qq t^ phl ee ng^', 'l qq k^ l e n^'}
 



 10%|█         | 60/599 [00:14<02:11,  4.09it/s]

M5731100221_0009
kl j^ ng a p^ r aa j^ z ee ng^ ng xx p^
{'', 'ng iia p^', 'phl ee ng^ z vv n^'}
phl ee ng^ t @@ p a j^ phl ee ng^ z a r a j^ 



 12%|█▏        | 70/599 [00:17<02:09,  4.09it/s]

F5731101921_0085
phl ee t^ phl ee ng^ ch @@ phl ee ng^ n ii
{'m a j^ ch @@ p^ phl ee ng^ n ii', 'kh @@ phl ee ng^'}
m a j^ ch @@ p^ phl ee ng^ n ii 

M5730279821_0018
l e n^ t @ m^ phl ee ng^ ng^
{'l e n^ phl ee ng^', 'r xx n^ d @ m^ phl ee ng^'}
r xx n^ d @ m^ phl ee ng^ 

M5730282621_0059
kh @@ t^ ch r aa j^ l vv n^
{'', 'ch a f i n^', 'phl ee ng^ z vv n^'}
kh @@ phl ee ng^ kl a j^ 



 13%|█▎        | 77/599 [00:19<02:09,  4.02it/s]

M5730328921_0001
k oo w aa c ii l e n^ phl ee ng^ ch ee l oo j uu n^
{'k oo w aa c ii l e n^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ z aa w^', 'k oo w aa c ii l e n^ phl ee ng^ kh uu ch ii w i t^'}
k oo w aa c ii l e n^ phl ee ng^ 

M5730282621_0047
k uua j^ c ii b a ng^ l xx ng^
{'phl ee ng^ n ii khr a j^ t x ng^', 'b a w^ s iia ng^'}
k oo w aa c ii b aa j^ b aa j^ 



 14%|█▎        | 82/599 [00:19<02:05,  4.11it/s]

M5730328921_0046
p i t^ k oo kr iia ng^
{'p i t^ s iia ng^', 'p i t^ pr oo kr x m^'}
p i t^ pr oo kr x m^ 



 16%|█▋        | 98/599 [00:23<02:01,  4.13it/s]

F5730295821_0067
phl ee ng^ phl ee ng^ h @@ ng^ s ii r a p i n^ s xx ng^
{'l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^', 'l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^ s a t xx m^'}
l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^ s a t xx m^ 

F5730295821_0026
l ee n^ phl ee ng^ n ii w a r a j^ l i ng^
{'phl ee ng^ n ii khr a j^ t x ng^', 'phl ee ng^ n ii khr a j^ r @@ ng^', 'l e n^ phl ee ng^ n ii s a m^'}
r qq m^ phl ee ng^ n ii m a j^ 



 17%|█▋        | 101/599 [00:24<02:00,  4.14it/s]

M5730282621_0081
l t^ phl ee ng^ n ee ng^ s a n^
{'l e n^ s a m^', 'phl ee ng^ t @@ p a j^'}
 



 18%|█▊        | 109/599 [00:26<01:58,  4.14it/s]

M5730282621_0055
phl qq m^ phl ee ng^ ch a
{'p qq t^ phl ee ng^ kl a j^', 'p qq t^ phl ee ng^'}
p qq t^ phl ee ng^ 

M5730279821_0042
l t^ k @@ n^
{'l aa k @@ j^', 'j u t^ k @@ n^', 'ph @@ t^'}
j u t^ k @@ n^ 



 19%|█▉        | 115/599 [00:27<01:55,  4.18it/s]

M5730279821_0063
l v n^ phl ee ng^ k a phl ee w i t^ phl ee n^ ph uu uu k^
{'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^', 'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ pr oo t^'}
l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ l uu k^ th u ng^ 



 20%|█▉        | 118/599 [00:28<01:55,  4.16it/s]

F5730295821_0038
phl aa ng^ b a w^ k^ n ii
{'kh aa m^ phl ee ng^ n ii', 'b a w^ z ii k^'}
b a w^ z ii k^ 



 21%|██        | 123/599 [00:29<01:53,  4.21it/s]

F5730295821_0079
ch iia ng^ khr @ n^
{'', 'b a w^ s iia ng^ n @ j^', 'ph @@ t^'}
ch a f i n^ 



 22%|██▏       | 129/599 [00:30<01:52,  4.17it/s]

M5731100221_0051
p qq t^ phl ee ng^ z a j^ h t^ j uu z a j^ l qq f^ j uu
{'', 'p qq t^ phl ee ng^ kl a j^'}
p qq t^ phl ee ng^ 



 22%|██▏       | 132/599 [00:31<01:52,  4.16it/s]

M5731100221_0063
l ee ng^ phl ee ng^ c aa k^ phl ee ng^ l ii s ee ng^ r u ph uu
{'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ l uu k^ th u ng^', 'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^', 'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ pr oo t^'}
l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ l uu k^ th u ng^ 



 23%|██▎       | 135/599 [00:32<01:50,  4.19it/s]

F5730295821_0042
m j u t^ z @@ l a w^
{'j u t^ k @@ n^', 'j u t^ ch uua khr aa w^'}
j u t^ k @@ n^ 

F5731101921_0084
phl ee ng^ phl ee ng^ n ii t^
{'kh aa m^ phl ee ng^ n ii', 'l e n^ phl ee ng^ kh @@ ng^', 'l e n^ phl ee ng^', 'z ii k^', 'phl ee ng^ z vv n^', 'l e n^ phl ee ng^ n ii s a m^'}
m a j^ z a w^ phl ee ng^ n ii 



 23%|██▎       | 138/599 [00:32<01:49,  4.21it/s]

M5730282621_0009
phl ee ng^ k o w^ ph aa ng^ phl ee ng^ z a j^
{'phl ee ng^ t @@ p a j^ phl ee ng^ z a r a j^', 'p qq t^ phl ee ng^ kl a j^'}
phl ee ng^ t @@ p a j^ phl ee ng^ z a r a j^ 

M5730282621_0067
phl ee ng^ h @ ng^ th ii l e n^ p i t^ phl ee ng^
{'l e n^ phl ee ng^', 'k oo w aa c ii l e n^ s i p^ phl ee ng^'}
l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^ s a t xx m^ 

 24%|██▎       | 142/599 [00:33<01:48,  4.20it/s]



M5730328921_0055
p qq t^ phl ee n^ w i t^ h aa n^
{'r qq m^ phl ee ng^ n ii m a j^', 'p qq t^ phl ee ng^ kl a j^'}
p qq t^ phl ee ng^ 



 25%|██▍       | 148/599 [00:34<01:46,  4.25it/s]

M5730282621_0068
l ee ng^ kh @@ t^ phl oo l ii k @@ n^
{'l aa k @@ j^', 'ph @@ t^'}
l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^ c oo z ii b @ j^ 



 25%|██▌       | 151/599 [00:35<01:45,  4.24it/s]

M5730328921_0037
p a w^ k @@ n^ r @ j^
{'b a w^ s iia ng^ n @ j^', 'b a w^ b a w^ n @ j^'}
b a w^ b a w^ n @ j^ 



 26%|██▌       | 155/599 [00:36<01:44,  4.23it/s]

M5730279821_0031
phl qq m^ n e t^ phl ee n^ l i t^ phl ee ng^ k @@ n^ z aa m^
{'ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^', 'phl ee ng^ t @@ p a j^', 'l e n^ phl ee ng^ k @@ n^ n aa', 'ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ pr oo t^'}
ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ 



 28%|██▊       | 165/599 [00:38<01:42,  4.25it/s]

F5731101921_0028
phl ee ph e kh xx t^
{'ph @@ l xx w^', ''}
phl ee ng^ n ii khr a j^ t x ng^ 



 29%|██▉       | 173/599 [00:40<01:40,  4.24it/s]

F5730295821_0066
phl ee ng^ kh @@ ng^ s iia z vv n^ phl ee n^ l ee ng^ l iia r iia k^
{'ng iia p^', 'pl iia n^'}
l e n^ phl ee ng^ kh @@ ng^ s i n^ l a p i n^ 

M5730282621_0091
kh oo l ee ng^ z a w^
{'kh @@ phl ee ng^ kl a j^', 'l e n^ s a m^'}
ph @@ l xx w^ 



 30%|██▉       | 178/599 [00:41<01:38,  4.26it/s]

F5730295821_0060
kh @@ p^ phl ee ng^ s ee w ee t^ n ii
{'', 'kh aa m^ phl ee ng^ n ii', 'm a j^ ch @@ p^ phl ee ng^ n ii', 'h aa m^ l e n^ phl ee ng^ n ii'}
kh @@ phl ee ng^ 

M5730282621_0045
s a ng^ phl
{'', 'kh aa m^'}
s a t @ p^ 



 30%|███       | 182/599 [00:43<01:38,  4.23it/s]

M5730279821_0001
k oo w aa c ii l e n^ phl ee ng^ ch o k^ j uu
{'k oo w aa c ii l e n^ phl ee ng^ s a t ee', 'k oo w aa c ii l e n^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ z aa w^'}
k oo w aa c ii l e n^ phl ee ng^ 

M5730328921_0065
phl ee ng^ kh @@ ng^ s u k^ p qq t^ ch u ng^ n ii z a
{'', 'r qq m^ phl ee ng^ n ii m a j^'}
l e n^ phl ee ng^ kh @@ ng^ 



 31%|███       | 187/599 [00:44<01:37,  4.21it/s]

M5731100221_0053
p qq t^ phl ee ng^ s khr ee t^
{'p qq t^ phl ee ng^ kl a j^', 'p qq t^ phl ee ng^'}
p qq t^ phl ee ng^ 

F5731101921_0013
phl ee m^ l ee ng^ r a j^
{'kh @@ phl ee ng^ kl a j^', 'phl ee ng^ t @@ p a j^', 'p qq t^ phl ee ng^ kl a j^', 'l e n^ s a m^'}
l e n^ phl ee ng^ 



 32%|███▏      | 191/599 [00:45<01:36,  4.22it/s]

M5730328921_0021
d aa l xx w^ n @ j^
{'d a ng^ d a ng^ n @ j^', 'b a w^ b a w^ n @ j^'}
d a ng^ d a ng^ n @ j^ 



 33%|███▎      | 196/599 [00:46<01:35,  4.22it/s]

M5730282621_0052
phl qq phl ee ng^ r aa j^ n a j^
{'phl ee ng^ t @@ p a j^', 'kh aa m^ p a j^'}
p qq t^ phl ee ng^ kl a j^ 

F5731101921_0063
phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ th u ng^
{'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ l uu k^ th u ng^', 'l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ pr oo t^'}
l e n^ phl ee ng^ c aa k^ ph e n^ l i t^ phl ee ng^ l uu k^ th u ng^ 



 34%|███▍      | 206/599 [00:49<01:34,  4.18it/s]

M5730328921_0064
t vv phl ee ng^ kh @@ ng^ m aa d uu m^ s @@ z aa z ii
{'k oo w aa c ii', '', 'phl ee ng^ n ii j aa w^ k ii n aa th ii'}
l e n^ phl ee ng^ kh @@ ng^ 

F5730295821_0064
khr ee ng^ phl ee ng^ kh @@ ng^ thr @@ d uu n^ p @ j^
{'', 'ph @@ t^', 'j u t^ k @@ n^', 'l e n^ phl ee ng^ kh @@ ng^'}
l e n^ phl ee ng^ kh @@ ng^ 



 35%|███▌      | 212/599 [00:50<01:32,  4.18it/s]

M5730282621_0095
m vva ng^ l ee ng^
{'', 's u m^ phl ee ng^', 'kh @@ phl ee ng^', 'l e n^ phl ee ng^', 'j u t^ phl ee ng^', 'p qq t^ phl ee ng^', 'p qq t^ n vva phl ee ng^'}
p qq t^ n vva phl ee ng^ 

M5730279821_0046
p i t^ ch vva k vva ng^
{'', 'p i t^ s iia ng^', 'p qq t^ n vva phl ee ng^'}
p i t^ pr oo kr x m^ 



 36%|███▌      | 217/599 [00:51<01:30,  4.22it/s]

M5730282621_0016
l iia ng^
{'ng iia p^', 'pl iia n^'}
pl iia n^ 



 37%|███▋      | 221/599 [00:52<01:29,  4.21it/s]

F5731101921_0003
k oo w aa c ii l e n^ phl ee ng^ r ee n^
{'k oo w aa c ii l e n^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ z aa w^'}
k oo w aa c ii l e n^ phl ee ng^ s a t ee 

F5731101921_0026
phl ee ng^ phl ee ng^ n a j^ n aa t^
{'ch a t^ d aa w^', 'l e n^ phl ee ng^ n ii s a m^'}
r qq m^ phl ee ng^ n ii m a j^ 



 38%|███▊      | 227/599 [00:53<01:28,  4.21it/s]

M5730279821_0058
phl kh @@ phl ee ng^ f oo
{'kh @@ phl ee ng^ f o n^', 'kh @@ phl ee ng^'}
kh @@ phl ee ng^ f o n^ 



 40%|████      | 240/599 [00:56<01:24,  4.26it/s]

F5730295821_0034
ph qq ng^ j ii
{'', 'ph qq m^ z ii k^'}
ph qq m^ z ii k^ 

M5730282621_0043
j vv t^ ch u ng^ phl ee
{'', 's u m^ phl ee ng^', 'j u t^ phl ee ng^'}
j u t^ ch uua khr aa w^ 



 41%|████      | 243/599 [00:57<01:23,  4.25it/s]

M5731100221_0031
ph qq l e n^ l i p^ ph ee ng^ t @@ z @@ n^
{'phl ee ng^ t @@ p a j^', 'l e n^ phl ee ng^ kh @@ ng^'}
ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ 



 41%|████      | 247/599 [00:58<01:23,  4.23it/s]

F5731101921_0001
k oo w aa c ii l vv n^ l ee ng^ ch ee ng^ khr e m^ j uu
{'k oo w aa c ii l e n^ h aa phl ee ng^', 'k oo w aa c ii l e n^ s i p^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ s a t ee', 'k oo w aa c ii l e n^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ z aa w^'}
k oo w aa c ii l e n^ phl ee ng^ 



 42%|████▏     | 251/599 [00:59<01:22,  4.23it/s]

F5731101921_0041
j u t^ n^ phl t^
{'j u t^ k @@ n^', 'j u t^ phl ee ng^'}
j u t^ 



 43%|████▎     | 258/599 [01:00<01:20,  4.24it/s]

M5730282621_0069
phl ee ng^ c aa k^ z a r aa r aa m^ d q r iia t^
{'ng iia p^', 'pl iia n^'}
l e n^ phl ee ng^ c aa k^ z a n^ b a m^ 



 48%|████▊     | 286/599 [01:07<01:13,  4.25it/s]

F5731101921_0024
ee ng^ phl ee ng^ kh @@ z a n aa
{'phl ee ng^ t @@ p a j^', 'l e n^ phl ee ng^ k @@ n^ n aa'}
l e n^ phl ee ng^ k @@ n^ n aa 



 49%|████▉     | 294/599 [01:09<01:12,  4.23it/s]

M5731100221_0015
kh aa z aa w^
{'ch a t^ d aa w^', 'kh aa m^ p a j^', 'kh aa m^'}
kh aa m^ 



 50%|████▉     | 298/599 [01:10<01:11,  4.22it/s]

F5730295821_0002
k oo w aa c ii l e n^ phl ee ng^ ch uu k aa
{'k oo w aa c ii l e n^ phl ee ng^ s a t ee', 'k oo w aa c ii l e n^ phl ee ng^', 'k oo w aa c ii l e n^ phl ee ng^ z aa w^'}
k oo w aa c ii l e n^ phl ee ng^ 



 50%|█████     | 301/599 [01:11<01:10,  4.21it/s]

F5731101921_0051
phl ee l p^ phl ee ng^ phl ee ng^ ch ii w a j^ l a p^ j uu t^
{'phl ee ng^ n ii khr a j^ t x ng^', 'l e n^ phl ee ng^ n ii s a m^', 'phl ee ng^ n ii khr a j^ r @@ ng^', 'j u t^'}
p qq t^ phl ee ng^ 

M5731100221_0001
k oo w aa c ii j ii k^ l e n^ phl ee ng^ ch ee ng^ ph @@ p^ j uu
{'l e n^ phl ee ng^ k @@ n^ n aa', 'l e n^ phl ee ng^ kh @@ ng^'}
k oo w aa c ii l e n^ phl ee ng^ 



 51%|█████▏    | 307/599 [01:13<01:09,  4.18it/s]

M5730282621_0031
ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ k @@ n^ n @@ n^
{'ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^', 'ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ pr oo t^'}
ph qq m^ n a j^ ph e n^ l i t^ phl ee ng^ 



 52%|█████▏    | 313/599 [01:14<01:08,  4.19it/s]

In [290]:
acc

0.5659432387312187