In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

In [2]:
import pandas as pd

def read_zhuyin2upa_table():
    zdf = pd.read_excel("zhuyin2ipa.xlsx")
    return zdf

In [3]:
SPECZ1=['ㄓ','ㄔ','ㄕ','ㄖ','ㄗ','ㄘ','ㄙ']
SPECZ2=['ㄧ','ㄨ','ㄩ']
FOURSOUNDS=['ˊ','ˇ','ˋ','˙']
DUOZ=['ㄧㄢ','ㄩㄢ','ㄧㄣ','ㄧㄥ','ㄨㄥ','ㄩㄥ']

def is_ascii(check_str):
     return all(ord(c) < 128 for c in check_str)

def foursounds(c):
    if 'ˋ' in c:
        return "51 "
    elif 'ˊ' in c:
        return "35 "
    elif 'ˇ' in c:
        return "21 "
    elif '˙' in c:
        return "0 "
    else:
        return "55 "
    
def convertZH(zdf, zh):
    #print("searching", zh)
    x = zdf[zdf['Zhuyin']==zh].IPA.to_string(index=False)
    if x == "Series([], )":
        return ''
    else:
        return x.replace(' ','')

def remove_foursounds(zh):
    z = zh
    for f in FOURSOUNDS:
        z = z.replace(f,'')
    return z

def is_duo(zh):
    for d in DUOZ:
        if d in zh:
            return True
    return False

def duo_convert(zdf, zh):
    #print("zh0",zh[0])
    matching = False
    for z in SPECZ2:
        #print("zzh", z, zh[0])
        if z == zh[0]:
            matching = True
            break
    
    if matching:
        #print("Converting zh", zh)
        return convertZH(zdf, zh)
    else:
        return convertZH(zdf, zh[0])+convertZH(zdf, zh[1:]).replace('\n','')
    
def zhuyin2ipa(zdf, zhuyin):
    zh=remove_foursounds(zhuyin)
    idx = 0
    zhlen = len(zh)
    
    if is_duo(zh):
        return(duo_convert(zdf, zh))
    
    IPAL=[]
    while (idx < zhlen):
        #print(idx)
        if zh[idx] in SPECZ1:
            if zhlen == 1:
                return (convertZH(zdf, zh[idx])+'ɨ')
            else:
                IPAL.append(convertZH(zdf, zh[idx]))
                idx = idx + 1
                
            if zh[idx] in SPECZ2:
                if idx == zhlen - 1:
                    #The last one
                    IPAL.append(convertZH(zdf, zh[idx]))
                    return IPAL
        else:
            IPAL.append(convertZH(zdf, zh[idx]))
            idx = idx + 1
            
    #print(IPAL)
    return IPAL


In [4]:
def chine2ipa(df, zdf, fn):
    col = df.columns.values
    row = df.index.values
    newdf = df.copy()

    for c in col:
        for r in row:
            ipastr=''
            x=str(df.loc[r,c]).replace(' ','')
            xx = pinyin(x, style=Style.BOPOMOFO)
            for item in xx:
                if not is_ascii(item[0]):
                    ipal=zhuyin2ipa(zdf, item[0])
                    fstr=foursounds(item[0])
                    for iipa in ipal:
                        ipastr = ipastr + iipa
                    ipastr = ipastr + foursounds(item[0])
                else:
                    ipastr = ipastr+item[0]
            newdf.loc[r,c] = str(df.loc[r,c])+' '+ipastr
    #print(newdf)
    newdf.to_excel(fn)    

In [5]:
def replace_oo_with_ipa():
    return

In [6]:
def excel_page1(zdf):
    # to read just one sheet to dataframe:
    df = pd.read_excel('exp123.xlsx', sheet_name="實驗一", index_col=0, head=0)
    col = df.columns.values
    row = df.index.values

    #print(df)

    for c in col:
        for r in row:
            x=str(df.loc[r,c]).lstrip().strip().replace(' ','|').replace('oo','o|o').replace('xx','x|x').replace('??','?|?').replace('ox','o|x').replace('xo','x|o').split('-')
            #print(x)
            xx=[b.count('|') for b in x if b.count('|') == 1]
            if (sum(xx) != 4):
                print(c,r, x)
    #newdf = df.copy()
    #
    #newdf.to_excel('實驗ㄧ.xlsx')
#excel_page1()

In [7]:
def excel_page2(zdf):
    # to read just one sheet to dataframe:
    df = pd.read_excel('exp123.xlsx', sheet_name="實驗二", index_col=0, head=0)
    col = df.columns.values
    row = df.index.values

    #print(df)

    for c in col:
        for r in row:
            x=str(df.loc[r,c]).lstrip().strip().replace(' ','|').replace('oo','o|o').replace('xx','x|x').replace('??','?|?').replace('ox','o|x').replace('xo','x|o').split('-')
            #print(x)
            xx=[b.count('|') for b in x if b.count('|') == 1]
            if (sum(xx) != 4):
                print(c,r, x)
    #newdf = df.copy()
    #
    #newdf.to_excel('實驗ㄧ.xlsx')

#excel_page2()

In [8]:
from pypinyin import pinyin, lazy_pinyin, Style

def excel_page3(zdf):
    df = pd.read_excel('exp123.xlsx', sheet_name="實驗三(女)",index_col=0, head=0)
    chine2ipa(df, zdf, '實驗三(女).xlsx')

#excel_page3(zdf)

In [9]:
# to read just one sheet to dataframe:

def excel_page4(zdf):
    df = pd.read_excel('exp123.xlsx', sheet_name="實驗三(男)",index_col=0, head=0)
    chine2ipa(df, zdf, '實驗三(男).xlsx')


In [10]:
def main():
    zdf = read_zhuyin2upa_table()
    excel_page1(zdf)
    excel_page2(zdf)
    excel_page3(zdf)
    excel_page4(zdf)
    
if __name__ == '__main__':
    main()  # 或是任何你想執行的函式


F1 颯穆 罵宿 散冒 曼臊 ['o|nu51', 'o|su51', 'sa51|mɑw51']
F1 搭粗 擦嘟 單糙 餐叨 ['X']
F3 岔怒 吶絀 顛鬧 難潮 ['fən51|nu51', 'o|o', 'tjɛn55|tɑw21']
F4 壩牧 罵怖 絆貿 蔓爆 ['o|o', 'o|o', 'pa51|pʰu51', 'pʰan51|o', 'x|x']
F4 塌書 沙禿 攤稍 山滔 ['san55|tʰu55', 'nan55|o', 'nan35|o', 'tʰu55']
F5 蹋注 詐吐 探趙 棧套 ['X']
F5 塌書 沙禿 攤稍 山滔 ['tʰa55|pu55', 'x|x', 'x|x']
F5 霸束 霎布 拌劭 善爆 ['X']
F5 哈甦 撒忽 鼾艘 三蒿 ['o|tʂu55', 'o|o', 'lan35|sɑw55']
F6 罵怒 那木 慢鬧 難冒 ['X']
F6 搭粗 擦嘟 單糙 餐叨 ['X']
F7 綻怒 納助 湛鬧 難兆 ['X']
M5 榨陸 臘祝 綻烙 爛趙 ['X', 'X', 'o|nan35', 'o|o']
M5 八禿 他逋 班掏 貪包 ['o|tʰɑw55', 'o|ʐən35', 'X', 'o|o']
M5 塌書 沙禿 攤稍 山滔 ['X', 'X', 'o|o', 'tʰa55|o']
M5 颯穆 罵宿 散冒 曼臊 ['su51|sa51', 'san55|nu35', 'X', 'x|x']
M5 罷瀑 怕不 辦泡 盼報 ['X', 'o|o', 'X', 'o|o']
M5 納路 辣怒 難烙 爛鬧 ['nɑw35|nu35', 'X', 'o|nu51', 'nɑw51|nɑw51']
M5 霸束 霎布 拌劭 善爆 ['X', 'X', 'X', 'sɑŋ51|o']
M5 大搙 鈉菟 淡鬧 難稻 ['o|o', 'X', 'nan51|tʰɑw51', 'o|o']
M5 搭粗 擦嘟 單糙 餐叨 ['X', 'tsʰan55|o', 'X', 'o|o']
M5 帕肅 撒曝 畔臊 散炮 ['X', 'X', 'X', 'sa55|pʰɑw51']
M5 趴初 插撲 攀超 摻拋 ['X', 'o|o', 'X', 'o|tɑw35']
M5 薩路 蠟速 散烙 爛臊 ['o|o', 'sa55|l