In [1]:
import os
import numpy as np
from tabulate import tabulate
from tqdm import tqdm
# import keras
# import keras.backend as K

# read data

In [1]:
enc_phone = dict()
dec_phone = dict()

with open('words.txt') as file:
    for line in file:
        ph, num = line.strip().split(' ')
        enc_phone[ph] = num
        dec_phone[num] = ph

In [91]:
pred = dict()

with open('nbest-disabilities-real.txt') as file:
    lines = file.readlines()
    change = False
    name = ''
    
    for line in lines:
        line = line.strip()
        if(line != ''):
#             if(line[0] in ['F', 'M']):
            if(line[0] in ['F', 'M', 'D']):
                change = True
                name = line      
                pred[name] = []
            elif(pred[name] == []):                
                target = line.split(' ')[1:]
                pred[name] = target           

In [92]:
label = dict()

with open('text-real') as file:
    for line in file:
        spl = line.strip().split(' ')
        label[spl[0]] = []
        for ph in spl[1:]:
            label[spl[0]].append(ph)

In [94]:
dictionary = []
with open('dictionary.txt') as file:
    for line in file:
        dictionary.append(line.strip())

# edit-distance

In [14]:
#dp edit-distance 
def distance(a, b, debug=False, weight=True):    
    # a is predict
    # b is label
    
    N = len(a)
    M = len(b)
    
    dp = np.zeros((N+1,M+1), dtype=np.int)        
    
    for i in range(N+1):
        dp[i][0] = i
        
    for j in range(M+1):
        dp[0][j] = j
    
    for i in range(N):
        for j in range(M):
            best_prev = min(dp[i][j], min(dp[i+1][j], dp[i][j+1]))
            if(a[i] == b[j]):
                dp[i+1][j+1] = dp[i][j]
            elif(not weight):
                dp[i+1][j+1] = best_prev + 1
            else:                                
                #insert
                if(dp[i][j+1] == best_prev):                        
                    dp[i+1][j+1] = best_prev + measure_weight(a[i], '')                    
                #delete
                elif(dp[i+1][j] == best_prev):
                    dp[i+1][j+1] = best_prev + measure_weight(b[j], '')                    
                #substitute 
                else:                                     
                    dp[i+1][j+1] = best_prev + measure_weight(a[i], b[j])
                
    if(debug):
        return dp
    else:
        return dp[N][M]
    
    
def debug(a = list("sunday"), b = list("suundaY")):
    table = distance(a, b, debug=True)
    a = list(a)
    b = list(b)
    tabs = []
    for i, row in enumerate(table):        
        tabs.append([([''] + a)[i]])
        tabs[-1].extend(row)

    print(tabulate(tabs, headers=['']+b))
    

def isVow(x):
    return x in ['a','aa', 'i', 'ii', 'v', 'vv', 'u', 'uu', 'e', 'ee',
                'x', 'xx', 'o', 'oo', '@', '@@', 'q', 'qq', 
                'ia', 'iia', 'va', 'vva', 'ua', 'uua']


def isFinal(x):
    return x[-1] == '^'


def isInit(x):
    return not isVow(x) and not isFinal(x)


def vow_score(x,y):    
    inter_vow_weight = 3 # a -> i
    partial_inter_vow_weight = 2 # vv -> vva, v -> va
    inside_vow_weight = 1 # v -> vv, ua -> uua
    
    if(len(x) > len(y)):
        x, y = y, x

    if( x[0]*2 + x[1:] == y ):
        return inside_vow_weight
    elif(x + 'a' == y):
        return partial_inter_vow_weight
    else:
        return inter_vow_weight
#     REF table: https://en.wikipedia.org/wiki/Thai_language#Vowels
    
    
def get_cluster_cost(x , y):
    cluster_cost = 0
    if(x[-1] in ['r', 'l']):        
        if(not y[-1] in ['r', 'l']):
            cluster_cost += 0.5        
        
    elif(x[-1] == 'w' and len(x) > 1):
        if(not y[-1] == 'w' and len(y) > 1):
            cluster_cost += 1                    

    return cluster_cost
    

def clean_cluster(x):
    if(len(x) > 1):
        if(x[-1] in ['r', 'l']):
            x = x[:-1]
        if(x[-1] == 'w'):
            x = x[-1]
    
    return x
    
    
def initc_score(x,y):
    #REF table: https://en.wikipedia.org/wiki/Thai_language#Initials
    #column: Labial, Alveolar, Palatal, Velar, Gloattal
    #row: Nasal, Plosive_voice, Plosive_tenuis, Plotsive_aspirated, Fricative, Approximant, Trill
    table = [[] for i in range(7)]
    table[0].extend(['m', 'n', '' , 'ng', ''])
    table[1].extend(['b', 'd', '' , '', ''])
    table[2].extend(['p', 't', 'c', 'k', 'z'])
    table[3].extend(['ph', 'th', 'ch', 'kh', ''])
    table[4].extend(['f', 's', '', '', 'h'])
    table[5].extend(['', 'l', 'j', 'w', ''])
    table[6].extend(['', 'r', '', '', ''])
    
    cluster_cost = get_cluster_cost(x, y)
    cluster_cost += get_cluster_cost(y, x)
    
    x = clean_cluster(x)
    y = clean_cluster(y)    
    
    for i in range(7):
        for j in range(5):
            if(x == table[i][j]):
                xi = i; xj = j;
            if(y == table[i][j]):
                yi = i; yj = j;
    

    dist = ((xi-yi)**2 + (xj-yj)**2)**0.5 + cluster_cost
    return min(dist, 3)
                
    
def finalc_score(x, y):
    #REF table: https://en.wikipedia.org/wiki/Thai_language#Finals
    #column: Labial, Alveolar, Palatal, Velar
    #row: Nasal, Plosive, Approximant
    table = [[] for i in range(3)]
    table[0].extend([['m^'], ['n^', 'l^'], [], ['ng^']])
    table[1].extend([['p^', 'f^'], ['t^','s^','ch^'], [], ['k^']])
    table[2].extend([['w^'], [], ['j^'], []])
    
    #default as (Glottis)
    xi = 1; xj = 4; yi = 1; yj = 4
    
    for i in range(3):
        for j in range(4):
            if(x in table[i][j]):
                xi = i; xj = j
            if(y in table[i][j]):
                yi = i; yj = j
        
    return min(((xi-yi)**2 + (xj-yj)**2)**0.5, 3)
    

def cross_type_score(x,y):
    return 4


def ins_del_score(x):
    if(isVow(x)):
        return 2.5
    else:
        return 1.5

    
def measure_weight(x, y):    
    
    if(x == y): return 0
    elif(y == ''): return ins_del_score(x)
    
    try:
        x = dec_phone[x]
        y = dec_phone[y]
    except:
        pass
        
    if(isVow(x) and isVow(y)):
        return vow_score(x,y)
    elif(isFinal(x) and isFinal(y)):
        return finalc_score(x,y)
    elif(isInit(x) and isInit(y)):
        return initc_score(x,y)
    else:
        return cross_type_score(x,y)

In [1]:
def find_closest(x, weight=True, dictionary=dictionary):
    possible = set()
    min_dist = 1e9
    for val in dictionary:        
        dist = distance(x, val.split(' '), weight=weight)
        if(dist < min_dist):
            min_dist = dist            
            possible = set([val])
        elif(dist == min_dist):
            possible.add(val)
    return possible, min_dist

NameError: name 'dictionary' is not defined

# test accuracy

In [250]:
keys = list(pred.keys())

In [251]:
k = keys[0]
print(' '.join(pred[k]))
print(' '.join(label[k]))
print(k)

possible, min_dist = find_closest(pred[k])
print(possible, min_dist)

d uu t^
s u t^
D00_017
{'s u t^', 'r uu', 'pr oo t^', 'm u ng^'} 4


In [271]:
%%time
acc = 0
amount = len(keys)
in_choice = 0
#amount = 5

for key in sorted(keys)[:amount]:
    possible, min_dist = find_closest(pred[key])
    possible = list(possible)
    print(key)
    print(possible)
    if(len(possible) > 0):        
        possible, min_dist = find_closest(pred[key], weight=False, dictionary=possible)
#         max_intersect = 0         
#         for pos in possible:
#             pass
#             posl = pos.split(' ')
#             intersect = 0
#             for char in pred[key]:
#                 if(char in posl):
#                     intersect += 1
#             if(intersect > max_intersect):
#                 ans = pos                

    ans = list(possible)[0]    
    lab = " ".join(label[key])
    print(key, ans == lab)
    print(" ".join(pred[key]))
    print(possible, min_dist)
    print(ans, '|', lab, '\n')
    if(ans == lab): acc += 1
    if(lab in possible): in_choice += 1
    
        

# print("acc {}".format(acc / amount))
acc

D00_000
['th ee', 'd ii', 'pr oo t^', 'p aa k^']
D00_000 False
p
{'th ee', 'd ii', 'p aa k^'} 2
th ee | p aa k^ 

D00_001
['t e n^', 'c i k^', 'th ee']
D00_001 True
t i ee ng^ t^ th
{'t e n^', 'c i k^', 'th ee'} 5
t e n^ | t e n^ 

D00_002
['d ii']
D00_002 False
k i th ii
{'d ii'} 3
d ii | c i k^ 

D00_003
['j @@']
D00_003 False
r @@ n^ ng^ ph
{'j @@'} 4
j @@ | k @@ n^ 

D00_004
['j aa w^']
D00_004 False
f aa w^ ee b
{'j aa w^'} 3
j aa w^ | z @@ j^ 

D00_005
['d ii', 'm ii']
D00_005 False
ng n ii
{'d ii', 'm ii'} 2
d ii | ph vva n^ 

D00_006
['c i k^', 'd ii']
D00_006 False
th ii ch ng^ f
{'d ii'} 4
d ii | th ee 

D00_007
['d ii', 'm ii']
D00_007 False
th uu w aa n i
{'d ii', 'm ii'} 6
d ii | ch uua j^ 

D00_008
['b a j^']
D00_008 True
m a j^
{'b a j^'} 1
b a j^ | b a j^ 

D00_009
['d ii']
D00_009 True
r th ii th ii
{'d ii'} 4
d ii | d ii 

D00_010
['b a j^']
D00_010 False
m o oo a j^
{'b a j^'} 3
b a j^ | m u ng^ 

D00_011
['m ii']
D00_011 True
m ii j ii t^
{'m ii'} 3
m ii | m ii 

D0

In [268]:
acc, acc/amount

(7, 0.2916666666666667)

In [269]:
in_choice, in_choice/amount

(8, 0.3333333333333333)

In [270]:
amount

24

# Postprocessing

In [95]:
label

{'D00_000': ['p', 'aa'],
 'D00_001': ['j', 'e', 'ng^'],
 'D00_002': ['c', 'ii'],
 'D00_003': ['k', '@@', 'n^'],
 'D00_004': ['k', '@@', 'z', 'a', 'j^'],
 'D00_005': ['p', 'vva', 'ng^'],
 'D00_006': ['th', 'ee'],
 'D00_007': ['th', 'uua', 'j^'],
 'D00_008': ['p', 'a', 'j^'],
 'D00_009': ['d', 'ii'],
 'D00_010': ['m', 'u', 'ng^'],
 'D00_011': ['m', 'ii'],
 'D00_012': ['ng', 'aa', 'm^'],
 'D00_013': ['ph', 'aa', 'j^'],
 'D00_014': ['k', 'iia', 'ng^'],
 'D00_015': ['z^', 'uu', 'p^'],
 'D00_016': ['f', 'u', 'n^'],
 'D00_017': ['t', 'u', 't^'],
 'D00_018': ['t', 'i', 'w^'],
 'D00_019': ['w', 'xx', 'n^'],
 'D00_020': ['j', '@@'],
 'D00_021': ['j', 'aa', 'w^'],
 'D00_022': ['p', 'oo', 'n^'],
 'D00_023': ['k', 'aa', 'm^']}

In [96]:
pred

{'D00_000': ['thr'],
 'D00_001': ['t', 'ee', 'ng^', 't'],
 'D00_002': ['z', 'th', 'ii', 'th', 'ii', 's^'],
 'D00_003': ['thr'],
 'D00_004': ['thr'],
 'D00_005': ['n', 'ii', 's^'],
 'D00_006': ['k', 'oo', 'ng^', 'f'],
 'D00_007': ['th', 'uu', 'w', 'aa', 's^'],
 'D00_008': ['m', 'a', 'j^'],
 'D00_009': ['thr', 'th', 'ii', 'th', 'ii'],
 'D00_010': ['thr'],
 'D00_011': ['m', 'ii', 'j', 'ii', 't^'],
 'D00_012': ['ng', 'aa', 'n^', 'thr'],
 'D00_013': ['n', 'aa', 's^'],
 'D00_014': ['thr'],
 'D00_015': ['thr'],
 'D00_016': ['s', 'u', 'ch^', 'ng^'],
 'D00_017': ['d', 'uu', 't^'],
 'D00_018': ['th', 'ii', 'th', 'xx', 'ng^'],
 'D00_019': ['thr', 'a', 'j^'],
 'D00_020': ['r', '@@', 's^'],
 'D00_021': ['j', 'aa', 'h', 'aa', 'ng^', 'thr'],
 'D00_022': ['khw', '@@', 'ng^', 'm', 'vva', 'm^'],
 'D00_023': ['n', 'aa', 'h', 'aa', 'm^']}

In [97]:
keys = sorted(list(pred.keys()))

In [98]:
for key in keys:
    print(pred[key],'\t', label[key])

['thr'] 	 ['p', 'aa']
['t', 'ee', 'ng^', 't'] 	 ['j', 'e', 'ng^']
['z', 'th', 'ii', 'th', 'ii', 's^'] 	 ['c', 'ii']
['thr'] 	 ['k', '@@', 'n^']
['thr'] 	 ['k', '@@', 'z', 'a', 'j^']
['n', 'ii', 's^'] 	 ['p', 'vva', 'ng^']
['k', 'oo', 'ng^', 'f'] 	 ['th', 'ee']
['th', 'uu', 'w', 'aa', 's^'] 	 ['th', 'uua', 'j^']
['m', 'a', 'j^'] 	 ['p', 'a', 'j^']
['thr', 'th', 'ii', 'th', 'ii'] 	 ['d', 'ii']
['thr'] 	 ['m', 'u', 'ng^']
['m', 'ii', 'j', 'ii', 't^'] 	 ['m', 'ii']
['ng', 'aa', 'n^', 'thr'] 	 ['ng', 'aa', 'm^']
['n', 'aa', 's^'] 	 ['ph', 'aa', 'j^']
['thr'] 	 ['k', 'iia', 'ng^']
['thr'] 	 ['z^', 'uu', 'p^']
['s', 'u', 'ch^', 'ng^'] 	 ['f', 'u', 'n^']
['d', 'uu', 't^'] 	 ['t', 'u', 't^']
['th', 'ii', 'th', 'xx', 'ng^'] 	 ['t', 'i', 'w^']
['thr', 'a', 'j^'] 	 ['w', 'xx', 'n^']
['r', '@@', 's^'] 	 ['j', '@@']
['j', 'aa', 'h', 'aa', 'ng^', 'thr'] 	 ['j', 'aa', 'w^']
['khw', '@@', 'ng^', 'm', 'vva', 'm^'] 	 ['p', 'oo', 'n^']
['n', 'aa', 'h', 'aa', 'm^'] 	 ['k', 'aa', 'm^']


All words in dictionary has only one syllable. 

* Step#1: convert multiple initial consonant at very first of prediction into one.
* Step#2: convert multiple vowel after step1 into one.
* Step#3: convert multiple final consonant at very first of prediction into one.
* Step#4: throw away remaining extra phones.

In [99]:
def post_processing(phones):
    c_i = list() #initial consonant
    v = list() #vowel
    c_f = list() #final consonant
    over_spelling = list()    
    state = 0
    phones = clean(phones)
    for i in range(len(phones)):
        if(state == 0):
            if(isInit(phones[i])): c_i.append(phones[i])
            elif(isVow(phones[i])): state = 1
            else: state = 2
                
        if(state == 1):
            if(isInit(phones[i])): state = 3
            elif(isVow(phones[i])): v.append(phones[i])
            else: state = 2
        
        if(state == 2):
            if(isInit(phones[i])): state = 3
            elif(isVow(phones[i])): state = 3
            else: c_f.append(phones[i])
        
        else: #state = 3
            pass
    
    return c_i, v, c_f
    
    
def remove_interfering(phones, n=1):
    state = None
    skip = 0
    i = 0
    
    while(i < len(phones)):
        ph = phones[i]
#         print(i, state, skip)
        if(state == None):
            state = ph
            skip = 0
        elif(ph != state):
            if(skip < n):
                skip += 1
            else:
                state = None               
                i = i - skip - 1               
        elif(skip > 0):
            for j in range(skip):
                phones[i-j-1] = state
            count = 0
        i += 1
        
    return phones


def remove_duplicate(phones):
    result = []
    for i in range(len(phones)):
        if(len(result) == 0):
            result.append(phones[i])
        elif(phones[i] != result[-1]):
            result.append(phones[i])
    return result


def clean(phones):
    temp = phones
#     while True:
    phones = remove_duplicate(remove_interfering(phones))
    #     if(temp == phones):
    #         break
    temp = phones
    return phones

In [102]:
remove_interfering(['aa','w','t','w','aa','aa'])

['aa', 'w', 'w', 'w', 'aa', 'aa']

In [103]:
remove_interfering(['aa','w','t','w','aa','aa']) == remove_interfering(['aa','w','t','w','aa','aa'])

True

In [None]:
post_processing(pred[k])

In [104]:
for k in keys:
    print('pred', pred[k])
    print('clean', clean(pred[k]))
    print('post', *post_processing(pred[k]))
    print('label', label[k], '\n')

pred ['thr']
clean ['thr']
post ['thr'] [] []
label ['p', 'aa'] 

pred ['t', 'ee', 'ng^', 't']
clean ['t', 'ee', 'ng^', 't']
post ['t'] ['ee'] ['ng^']
label ['j', 'e', 'ng^'] 

pred ['z', 'th', 'ii', 'th', 'ii', 's^']
clean ['z', 'th', 'ii', 's^']
post ['z', 'th'] ['ii'] ['s^']
label ['c', 'ii'] 

pred ['thr']
clean ['thr']
post ['thr'] [] []
label ['k', '@@', 'n^'] 

pred ['thr']
clean ['thr']
post ['thr'] [] []
label ['k', '@@', 'z', 'a', 'j^'] 

pred ['n', 'ii', 's^']
clean ['n', 'ii', 's^']
post ['n'] ['ii'] ['s^']
label ['p', 'vva', 'ng^'] 

pred ['k', 'oo', 'ng^', 'f']
clean ['k', 'oo', 'ng^', 'f']
post ['k'] ['oo'] ['ng^']
label ['th', 'ee'] 

pred ['th', 'uu', 'w', 'aa', 's^']
clean ['th', 'uu', 'w', 'aa', 's^']
post ['th'] ['uu'] []
label ['th', 'uua', 'j^'] 

pred ['m', 'a', 'j^']
clean ['m', 'a', 'j^']
post ['m'] ['a'] ['j^']
label ['p', 'a', 'j^'] 

pred ['thr', 'th', 'ii', 'th', 'ii']
clean ['thr', 'th', 'ii']
post ['thr', 'th'] ['ii'] []
label ['d', 'ii'] 

pred ['thr']
c

# Scoring

In [113]:
keys = sorted(list(pred.keys()))

In [114]:
def scoring(text , target):
    init, vowel, final = text     
    t_final = None
    
    if(len(target) == 3): t_init, t_vowel, t_final = target
    else: t_init, t_vowel = target
    
    correct = 0
    print(text, target)    
    
    if(t_init in init):
        correct += 1
#         print('init:', t_init)
        
    if(t_vowel in vowel):
        correct += 1
#         print('vowel:', t_vowel)
    
    if(t_final != None and t_final in final):
        correct += 1
#         print('final:', t_final)
    print(correct, end='\n\n')
    return correct

In [115]:
score = 0
for k in keys:    
    score += scoring(post_processing(pred[k]), label[k])
        
        
score / len(3*keys)

(['thr'], [], []) ['p', 'aa']
0

(['t'], ['ee'], ['ng^']) ['j', 'e', 'ng^']
1

(['z', 'th'], ['ii'], ['s^']) ['c', 'ii']
1

(['thr'], [], []) ['k', '@@', 'n^']
0

(['thr'], [], []) ['k', '@@', 'j^']
0

(['n'], ['ii'], ['s^']) ['p', 'vva', 'ng^']
0

(['k'], ['oo'], ['ng^']) ['th', 'ee']
0

(['th'], ['uu'], []) ['th', 'uua', 'j^']
1

(['m'], ['a'], ['j^']) ['p', 'a', 'j^']
2

(['thr', 'th'], ['ii'], []) ['d', 'ii']
1

(['thr'], [], []) ['m', 'u', 'ng^']
0

(['m'], ['ii'], ['t^']) ['m', 'ii']
2

(['ng'], ['aa'], ['n^']) ['ng', 'aa', 'm^']
2

(['n'], ['aa'], ['s^']) ['ph', 'aa', 'j^']
1

(['thr'], [], []) ['k', 'iia', 'ng^']
0

(['thr'], [], []) ['z^', 'uu', 'p^']
0

(['s'], ['u'], ['ch^', 'ng^']) ['f', 'u', 'n^']
1

(['d'], ['uu'], ['t^']) ['t', 'u', 't^']
1

(['th'], ['xx'], ['ng^']) ['t', 'i', 'w^']
0

(['thr'], ['a'], ['j^']) ['w', 'xx', 'n^']
0

(['r'], ['@@'], ['s^']) ['j', '@@']
1

(['j'], ['aa'], ['ng^']) ['j', 'aa', 'w^']
2

(['khw'], ['@@'], ['ng^']) ['p', 'oo', 'n^']
0

(['n'], 

0.25