In [79]:
import parselmouth
import os.path
import glob
import pandas as pd
import numpy as np

In [74]:
# read .wav files and make mono .wav files 
def read_wav(wav_file):
    print("processing {}...".format(wav_file))
    snd = parselmouth.Sound(wav_file)
    if len(snd.extract_all_channels()) == 2:
        snd_A = snd.extract_all_channels()[0]
        snd_B = snd.extract_all_channels()[1]
               
        snd_A.save(os.path.splitext(wav_file)[0] + "A.wav", 'WAV')
        snd_B.save(os.path.splitext(wav_file)[0] + "B.wav", 'WAV')
        path_A = os.path.splitext(wav_file)[0] + "A.wav"
        path_B = os.path.splitext(wav_file)[0] + "B.wav"
        print(path_A, path_B)
        return snd_A, snd_B, path_A, path_B

In [80]:
# for wav_file in glob.glob("../data/*.wav"):
read_wav("../data/sw2005.wav")

processing ../data/sw2005.wav...
../data/sw2005A.wav ../data/sw2005B.wav


(<parselmouth.Sound at 0x12fd9d688>,
 <parselmouth.Sound at 0x12fe01c70>,
 '../data/sw2005A.wav',
 '../data/sw2005B.wav')

In [82]:
# read transcription files and add headers
def read_trans(path):
    trans = pd.read_csv(path, sep='\t', header=None)
    trans.columns = ["ms_id", "ldc_id", "start_time", "end_time", "tag", "ldc_trans", "ms_trans"]
    
    # drop rows which have the same start time
    trans = trans.drop_duplicates(subset="end_time")
    
    # however index cannot be reset, so convert df to nparray
    trans = trans.to_numpy()
    return trans

In [92]:
# get path list and trans list
path_list = sorted(glob.glob("../data/*.text"))
trans_list = []
for i in range(len(path_list)):
    trans_list.append(read_trans(path_list[i]))
trans_list

[array([['sw2005A-ms98-a-0001', 'A.1', 0.0, 0.8, nan, '[silence]',
         '[silence]'],
        ['sw2005A-ms98-a-0001', 'A.1', 0.8, 1.28, nan, 'Okay', 'okay'],
        ['sw2005A-ms98-a-0001', 'A.1', 1.28, 1.5, nan, 'Uh', 'uh'],
        ['sw2005A-ms98-a-0001', 'A.1', 1.5, 1.985, nan, 'first', 'first'],
        ['sw2005A-ms98-a-0001', 'A.1', 1.985, 2.48225, nan, 'um', 'um'],
        ['sw2005A-ms98-a-0001', 'A.1', 2.48225, 2.721, nan, '[silence]',
         '[silence]'],
        ['sw2005A-ms98-a-0001', 'A.1', 2.721, 2.83775, nan, 'I', 'i'],
        ['sw2005A-ms98-a-0001', 'A.1', 2.83775, 3.0, nan, 'need', 'need'],
        ['sw2005A-ms98-a-0001', 'A.1', 3.0, 3.06, nan, 'to', 'to'],
        ['sw2005A-ms98-a-0001', 'A.1', 3.06, 3.46, nan, 'know', 'know'],
        ['sw2005A-ms98-a-0001', 'A.1', 3.46, 3.73925, nan, 'uh', 'uh'],
        ['sw2005A-ms98-a-0001', 'A.1', 3.73925, 3.8145, nan, '[silence]',
         '[silence]'],
        ['sw2005A-ms98-a-0001', 'A.1', 3.8145, 3.99, '<DEL>', '---',
 

In [24]:
import sys
np.set_printoptions(threshold=sys.maxsize)

(1381, 7)
[['sw2005A-ms98-a-0001' 'A.1' 0.0 0.8 nan '[silence]' '[silence]']
 ['sw2005A-ms98-a-0001' 'A.1' 0.8 1.28 nan 'Okay' 'okay']
 ['sw2005A-ms98-a-0001' 'A.1' 1.28 1.5 nan 'Uh' 'uh']
 ['sw2005A-ms98-a-0001' 'A.1' 1.5 1.985 nan 'first' 'first']
 ['sw2005A-ms98-a-0001' 'A.1' 1.985 2.48225 nan 'um' 'um']
 ['sw2005A-ms98-a-0001' 'A.1' 2.48225 2.721 nan '[silence]' '[silence]']
 ['sw2005A-ms98-a-0001' 'A.1' 2.721 2.83775 nan 'I' 'i']
 ['sw2005A-ms98-a-0001' 'A.1' 2.83775 3.0 nan 'need' 'need']
 ['sw2005A-ms98-a-0001' 'A.1' 3.0 3.06 nan 'to' 'to']
 ['sw2005A-ms98-a-0001' 'A.1' 3.06 3.46 nan 'know' 'know']
 ['sw2005A-ms98-a-0001' 'A.1' 3.46 3.73925 nan 'uh' 'uh']
 ['sw2005A-ms98-a-0001' 'A.1' 3.73925 3.8145 nan '[silence]' '[silence]']
 ['sw2005A-ms98-a-0001' 'A.1' 3.8145 3.99 '<DEL>' '---' 'h[ow]-']
 ['sw2005A-ms98-a-0001' 'A.1' 3.99 4.287875 nan 'how' 'how']
 ['sw2005A-ms98-a-0001' 'A.1' 4.287875 4.400875 nan 'do' 'do']
 ['sw2005A-ms98-a-0001' 'A.1' 4.400875 4.602125 nan 'you' 'you']


In [93]:
# function to create a TextGrid 
def text_grid(sound, path, trans):
    tg = parselmouth.praat.call(sound, "To TextGrid", "trans", "")
    
    # call.() or parselmouth.praat.call()?
    # timeit df.index < df.shape < df.[0].count()
    for i in range(len(trans)):
        if i < len(trans)-1:
            # one time point can only insert one boundary, need to delete rows with same time points
            # parselmouth.praat.call(tg, "Insert boundary", 1, trans.start_time[i+1])
            parselmouth.praat.call(tg, "Insert boundary", 1, trans[i, 3])
            parselmouth.praat.call(tg, "Set interval text", 1, i+1, trans[i, 6])
        else:
            parselmouth.praat.call(tg, "Set interval text", 1, i+1, trans[i, 6])
        
    tg.save(os.path.splitext(path)[0] + ".TextGrid")

In [94]:
text_grid(snd_B, path_B, trans_list[1])

In [96]:
pf = pd.read_csv("/Users/the-imitation-gamer/Documents/SLP/Msc_Dissertation/praat-prosody_v0.1.1/demo/work_dir/pf_files/demo_C-pf.Tab", sep='\t')
pf

Unnamed: 0,WORD$,WAV$,SPKR_ID$,GEN$,WORD_START,WORD_END,FWORD$,FWORD_START,FWORD_END,PAUSE_START,...,AVG_PHONE_DUR_NSP,MAX_PHONE_DUR_NSP,AVG_VOWEL_DUR_Z,MAX_VOWEL_DUR_Z,AVG_VOWEL_DUR_N,MAX_VOWEL_DUR_N,AVG_VOWEL_DUR_ZSP,MAX_VOWEL_DUR_ZSP,AVG_VOWEL_DUR_NSP,MAX_VOWEL_DUR_NSP
0,UMHM,../demo/data/demo_C.wav,C,female,672,709,OKAY,1442,1464,709,...,?,?,0.576141,1.743788,1.559701,2.69403,0.2886751345948128,1.1547005383792515,1.3199999999999998,2.28
1,OKAY,../demo/data/demo_C.wav,C,female,1442,1464,I'LL,1464,1485,1464,...,?,?,-0.779463,-0.62137,0.55716,0.651163,?,?,?,?
2,I'LL,../demo/data/demo_C.wav,C,female,1464,1485,I'LL,1547,1557,1485,...,1.5897435897435899,1.8461538461538463,0.616212,0.616212,1.560976,1.560976,1.1013775835078696,1.1013775835078696,1.8461538461538463,1.8461538461538463
3,I'LL,../demo/data/demo_C.wav,C,female,1547,1557,WRITE,1557,1580,1557,...,0.8038461538461539,0.8076923076923077,-0.348294,-0.348294,0.682927,0.682927,-0.25031308716087936,-0.25031308716087936,0.8076923076923077,0.8076923076923077
4,WRITE,../demo/data/demo_C.wav,C,female,1557,1580,YOU,1608,1627,1580,...,?,?,-0.776963,-0.776963,0.292683,0.292683,-0.8510644963469899,-0.8510644963469899,0.3461538461538462,0.3461538461538462
5,YOU,../demo/data/demo_C.wav,C,female,1608,1627,TELL,1630,1652,1627,...,1.1928879310344827,1.4482758620689655,0.564076,0.564076,1.25,1.25,-0.13245323570650425,-0.13245323570650425,0.9375,0.9375
6,TELL,../demo/data/demo_C.wav,C,female,1630,1652,ME,1652,1668,1652,...,?,?,1.733081,1.733081,2.115385,2.115385,?,?,?,?
7,ME,../demo/data/demo_C.wav,C,female,1652,1668,YOU,1677,1689,1668,...,1.1014285714285714,1.5599999999999998,0.434727,0.434727,1.322034,1.322034,1.12089707663561,1.12089707663561,1.5599999999999998,1.5599999999999998
8,YOU,../demo/data/demo_C.wav,C,female,1677,1689,TELL,1689,1714,1689,...,0.7467672413793104,0.9310344827586208,-0.564076,-0.564076,0.75,0.75,-0.9271726499455304,-0.9271726499455304,0.5625,0.5625
9,TELL,../demo/data/demo_C.wav,C,female,1689,1714,ME,1714,1725,1714,...,?,?,-0.059761,-0.059761,0.961538,0.961538,?,?,?,?


In [97]:
import matplotlib.pyplot as plt
import statsmodels.graphics.api as smg

In [110]:
x = pf.MEAN_F0.to_numpy()
y = pf.MEAN_STYLFIT_ENERGY.to_numpy()
c_matrix = np.array([x, y])
corr_matrix = np.corrcoef(c_matrix)
corr_matrix
#smg.plot_corr(corr_matrix, xnames=["MEAN_F0"], ynames=["MEAN_STYLFIT_F0"])
#plt.show()

array([[1.        , 0.49323877],
       [0.49323877, 1.        ]])

In [107]:
np.array([x, y])

array([[101.53384297, 102.26540546, 103.75305264, 126.39658187,
        121.86414019, 131.75730154, 144.54508782, 135.17518986,
        136.2868336 , 150.31434577, 135.7321238 , 131.72439049,
        137.2083989 , 134.54963462, 112.70338185, 118.64241123,
        111.50074561, 132.76082917],
       [101.86814276, 102.54362436, 105.18856861, 128.14314833,
        125.72840181, 131.43813768, 141.65257321, 129.13140631,
        135.95448171, 152.71338499, 144.22366273, 135.73394047,
        129.0105336 , 125.20146982, 114.66466318, 112.1342559 ,
        115.51029954, 134.7809936 ]])