In [1]:
import parselmouth as ps
from parselmouth.praat import call as pcall  
from parselmouth import Sound
from pathlib import Path
from phonlab.tidypraat import formant2df
import audiolabel
from audiolabel import read_label
import pandas as pd
import os
from phonlab.utils import dir2df
import numpy as np
from itertools import chain
np.Inf = np.inf

In [2]:
# Non-varied parameters that are defined in the global environment and used in functions.
downtotableparams = {
    'left_edge': 0.0,
    'right_edge': 0.0,   # All times in formantpath
    'coeff_by_track': '3 3 3 3 3',
    'power': 1.25,
    'inc_frame_num': 'no',
    'inc_time': 'yes',
    'num_time_decimal': 6,
    'inc_intensity': 'yes',
    'num_intensity_decimal': 3,
    'inc_num_formants': 'yes',
    'num_freq_decimal': 3,
    'inc_bw': 'yes',
    'inc_optimal_ceil': 'yes',
    'inc_min_stress': 'yes',
}
downtotabledtype = {
    'time(s)': np.float32,
    'intensity': np.float32,
    'nformants': np.int16,
    'F1(Hz)': np.float32,
    'B1(Hz)': np.float32,
    'F2(Hz)': np.float32,
    'B2(Hz)': np.float32,
    'F3(Hz)': np.float32,
    'B3(Hz)': np.float32,
    'F4(Hz)': np.float32,
    'B4(Hz)': np.float32,
    'F5(Hz)': np.float32,
    'B5(Hz)': np.float32,
    'Ceiling(Hz)': np.float32,
    'Stress': np.float32,
}
# Cols created by 'Down to Table (optimal interval)...'
downtotablecols = list(downtotabledtype.keys())

In [36]:
# fpathparams = {
#     'time_step': 0.005,
#     'max_number_of_formants': 4,
#     'mid_formant': 5000,
#     'window_length': 0.025,
#     'pre_emphasis_from': 50,
#     'LPC_model': 'Robust',
#     'ceiling_type_size': 0.05,
#     'num_steps_up': 4,
#     'tolerance_1': 1e-6,
#     'tolerance_2': 1e-6,
#     'number_of_std_dev': 1.5,
#     'max_number_of_iterations': 5,
#     'tolerance': 0.000001, 
#     'get_sources_as_multichan': 'no'
# }

In [38]:
# fpathargs = [
#     fpathparams['time_step'],
#     fpathparams['max_number_of_formants'],
#     fpathparams['mid_formant'],
#     fpathparams['window_length'],
#     fpathparams['pre_emphasis_from'],
#     fpathparams['LPC_model'],
#     fpathparams['ceiling_type_size'],
#     fpathparams['num_steps_up'],
#     fpathparams['tolerance_1'],
#     fpathparams['tolerance_2'],
#     fpathparams['number_of_std_dev'],
#     fpathparams['max_number_of_iterations'],
#     fpathparams['tolerance'],
#     fpathparams['get_sources_as_multichan']
# ]

In [23]:
# def get_formants_for_token(row, fdf):
#     """
#     Process each token in `tg` to find the median formant values at the midpoint and three points on either side.
#     Returns a DataFrame with t1, t2, midpt, med_f1, med_f2, med_f3, and their corresponding ceiling values.
#     """
#     # Calculate the midpoint for the token
#     midpt = (row.t1 + row.t2) / 2

#     # Select and filter points around the midpoint in fdf
#     tkdf = fdf[['t1', 'F1(Hz)', 'F2(Hz)', 'F3(Hz)', 'ceiling']].copy()
#     tkdf['dist_to_midpt'] = abs(tkdf['t1'] - midpt)
#     tkdf = tkdf.nsmallest(7, 'dist_to_midpt')

#     # Calculate medians for f1, f2, and f3
#     med_f1 = tkdf['F1(Hz)'].median()
#     med_f2 = tkdf['F2(Hz)'].median()
#     med_f3 = tkdf['F3(Hz)'].median()

#     # Create a dictionary for the token's results
#     result_row = {
#         't1': row.t1,
#         't2': row.t2,
#         'speaker': row.speaker,
#         'recording': row.recording,
#         'phones': row.phones,
#         'phone_dur': row.phone_dur,
#         'midpt': midpt,
#         'prev': row.prev,
#         'nxt': row.nxt,
#         'words': row.words,
#         'med_f1': med_f1,
#         'med_f2': med_f2,
#         'med_f3': med_f3,
#         'ceiling_f1': ceiling_f1,
#         'ceiling_f2': ceiling_f2,
#         'ceiling_f3': ceiling_f3,
#     }

#     # Return the result as a DataFrame with one row
#     return pd.DataFrame([result_row])

# Single speaker test

## Single-word toy file

In [11]:
tdir = Path('/Users/ambergalvano/Downloads/test').absolute()
tspkrdf = dir2df(tdir, fnpat=r'\.wav$')
tspkrdf

Unnamed: 0,relpath,fname
0,S01,dollar-store.wav


In [12]:
vowels = ['IY','IY0', 'IY1', 'IY2', 'IH', 'IH0', 'IH1', 'IH2', 'EY', 'EY0', 'EY1', 'EY2', 'EH', 'EH0', 'EH1', 'EH2', 
          'AH', 'AH0', 'AH1', 'AE', 'AE0', 'AE1', 'AE2', 'ER', 'ER0', 'ER1', 'ER2', 'UW', 'UW0', 'UW1', 'UW2','UH', 
          'UH0', 'UH1', 'UH2', 'OW', 'OW0', 'OW1', 'OW2', 'AA', 'AA0', 'AA1', 'AA2', 'AO', 'AO0', 'AO1', 'AO2']

for row in tspkrdf.loc[[0]].itertuples():
    spkrfile = Path(tdir, row.relpath, row.fname)
    [phdf, wrdf] = read_label(spkrfile.with_suffix('.TextGrid'), ftype='praat', 
        tiers=['phones', 'words'])
    
    print(f"Columns in phdf: {phdf.columns}")
    print(f"First few rows of phdf:\n{phdf.head()}")
    
    phdf = phdf[phdf['phones']!=''].copy()
    phdf['phone_dur'] = phdf['t2']-phdf['t1'] 
    phdf['prev']=phdf['phones'].shift()
    phdf['nxt']=phdf['phones'].shift(-1)
    phdf = phdf[phdf['phones'].isin(vowels) & (phdf['phone_dur'] >= 0.05)] 
    
    tg = pd.merge_asof(
        phdf[['t1', 't2', 'phones', 'phone_dur', 'prev', 'nxt']],               
        wrdf[['t1', 'words']], 
        on='t1', 
        suffixes=['_ph', '_wd']
    )
    
    tg.insert(2, 'speaker', row.relpath)
    tg.insert(3, 'recording', row.fname) 
    
    print('Done with TG') 

tg

Columns in phdf: Index(['t1', 't2', 'phones', 'fname'], dtype='object')
First few rows of phdf:
         t1        t2 phones  \
0  0.000000  0.067296      D   
1  0.067296  0.171998     AA   
2  0.171998  0.227970      L   
3  0.227970  0.296455     ER   
4  0.296455  0.426179      S   

                                               fname  
0  /Users/ambergalvano/Downloads/test/S01/dollar-...  
1  /Users/ambergalvano/Downloads/test/S01/dollar-...  
2  /Users/ambergalvano/Downloads/test/S01/dollar-...  
3  /Users/ambergalvano/Downloads/test/S01/dollar-...  
4  /Users/ambergalvano/Downloads/test/S01/dollar-...  
Done with TG


Unnamed: 0,t1,t2,speaker,recording,phones,phone_dur,prev,nxt,words
0,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar
1,0.22797,0.296455,S01,dollar-store.wav,ER,0.068484,L,S,dollar
2,0.480835,0.590805,S01,dollar-store.wav,AO,0.10997,T,R,store


In [7]:
# loop over each vowel token
snd = Sound('/Users/ambergalvano/Downloads/test/S01/dollar-store.wav')
results = []

# Iterate through each row of the TextGrid DataFrame (tg)
for _, row in tg.iterrows():
    part = snd.extract_part(row.t1 - 0.05, row.t2 + 0.05, preserve_times=True)
    fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5, 
                     1e-6, 1e-6, 1.5, 5, 0.000001, 'no')
    
    opttable = pcall(fp_token, 'Down to Table (optimal interval)...', *downtotableparams.values())
    optmatrix = pcall(opttable, 'Down to Matrix')
    fmtdf = pd.DataFrame({
        c: pd.Series(optmatrix.values[:, i], dtype=downtotabledtype[c]) for i, c in enumerate(downtotablecols)
    })
    
    # fmtdf['segment'] = row['segment']  # Replace 'segment' with a meaningful identifier column from your TextGrid

    fmtdf['t1'] = row.t1
    fmtdf['t2'] = row.t2
    
    results.append(fmtdf)
    

final_fmtdf = pd.concat(results, ignore_index=True)
final_fmtdf

Unnamed: 0,time(s),intensity,nformants,F1(Hz),B1(Hz),F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress,t1,t2
0,0.044647,0.00006,4,430.479004,514.500977,1695.396973,981.478027,2929.024902,740.489990,3987.624023,919.161987,,,4994.000000,20.129999,0.067296,0.171998
1,0.049647,0.00100,5,674.703003,372.981995,1849.902954,773.984009,3112.315918,1028.932007,3586.970947,1894.979980,4420.027832,707.030029,4994.000000,20.129999,0.067296,0.171998
2,0.054647,0.00500,5,663.950989,270.420990,1853.202026,713.364014,3089.398926,764.244995,3863.785889,2084.045898,4411.801758,836.869995,4994.000000,20.129999,0.067296,0.171998
3,0.059647,0.00400,4,572.343018,227.395996,1679.572021,572.463989,2937.693115,334.194000,4004.259033,465.640015,,,4994.000000,20.129999,0.067296,0.171998
4,0.064647,0.00100,4,555.164001,152.682999,1519.003052,270.213013,2912.375000,301.436005,4041.623047,446.658997,,,4994.000000,20.129999,0.067296,0.171998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,0.593320,0.00009,5,560.492981,99.955002,808.505005,491.520996,1776.784058,201.552002,2075.486084,821.101990,3898.639893,853.315979,4088.699951,116.250000,0.480835,0.590805
83,0.598320,0.00009,5,576.497009,124.785004,763.091003,507.358002,1791.120972,185.813004,1984.865967,866.460022,3794.907959,800.151978,4088.699951,116.250000,0.480835,0.590805
84,0.603320,0.00009,5,569.695007,129.587997,772.648987,504.725006,1813.791992,193.011002,1950.952026,932.094971,3768.252930,753.401001,4088.699951,116.250000,0.480835,0.590805
85,0.608320,0.00010,5,601.984985,224.612000,724.025024,538.166016,1803.836060,244.225006,1982.265015,973.611023,3779.579102,623.651001,4088.699951,116.250000,0.480835,0.590805


In [10]:
merged_df = pd.merge(tg, final_fmtdf, on=['t1', 't2'], how='left')
merged_df.columns
merged_df

Unnamed: 0,t1,t2,speaker,recording,phones,phone_dur,prev,nxt,words,time(s),...,F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress
0,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar,0.044647,...,1695.396973,981.478027,2929.024902,740.489990,3987.624023,919.161987,,,4994.000000,20.129999
1,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar,0.049647,...,1849.902954,773.984009,3112.315918,1028.932007,3586.970947,1894.979980,4420.027832,707.030029,4994.000000,20.129999
2,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar,0.054647,...,1853.202026,713.364014,3089.398926,764.244995,3863.785889,2084.045898,4411.801758,836.869995,4994.000000,20.129999
3,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar,0.059647,...,1679.572021,572.463989,2937.693115,334.194000,4004.259033,465.640015,,,4994.000000,20.129999
4,0.067296,0.171998,S01,dollar-store.wav,AA,0.104702,D,L,dollar,0.064647,...,1519.003052,270.213013,2912.375000,301.436005,4041.623047,446.658997,,,4994.000000,20.129999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,0.480835,0.590805,S01,dollar-store.wav,AO,0.109970,T,R,store,0.593320,...,808.505005,491.520996,1776.784058,201.552002,2075.486084,821.101990,3898.639893,853.315979,4088.699951,116.250000
83,0.480835,0.590805,S01,dollar-store.wav,AO,0.109970,T,R,store,0.598320,...,763.091003,507.358002,1791.120972,185.813004,1984.865967,866.460022,3794.907959,800.151978,4088.699951,116.250000
84,0.480835,0.590805,S01,dollar-store.wav,AO,0.109970,T,R,store,0.603320,...,772.648987,504.725006,1813.791992,193.011002,1950.952026,932.094971,3768.252930,753.401001,4088.699951,116.250000
85,0.480835,0.590805,S01,dollar-store.wav,AO,0.109970,T,R,store,0.608320,...,724.025024,538.166016,1803.836060,244.225006,1982.265015,973.611023,3779.579102,623.651001,4088.699951,116.250000


In [13]:
merged_df.to_csv('formants_dollar-store_test_12-03-24.csv')

## Whole interview

In [3]:
datadir = Path('./data').absolute()
spkrdf = dir2df(datadir, fnpat=r'\.wav$')
spkrdf

Unnamed: 0,relpath,fname
0,S01,S01_interview.wav
1,S02,S02_interview.wav
2,S03,S03_interview_1.wav
3,S03,S03_interview_2.wav
4,S05,S05_interview.wav
5,S06,S06_interview.wav
6,S07,S07_interview.wav
7,S08,S08_interview.wav
8,S09,S09_interview.wav
9,S10,S10_interview.wav


In [24]:
vowels = ['IY','IY0', 'IY1', 'IY2', 'IH', 'IH0', 'IH1', 'IH2', 'EY', 'EY0', 'EY1', 'EY2', 'EH', 'EH0', 'EH1', 'EH2', 
          'AH', 'AH0', 'AH1', 'AE', 'AE0', 'AE1', 'AE2', 'ER', 'ER0', 'ER1', 'ER2', 'UW', 'UW0', 'UW1', 'UW2','UH', 
          'UH0', 'UH1', 'UH2', 'OW', 'OW0', 'OW1', 'OW2', 'AA', 'AA0', 'AA1', 'AA2', 'AO', 'AO0', 'AO1', 'AO2']

for row in spkrdf.loc[[0]].itertuples():
    print(row.relpath)
    
    spkrfile = Path(datadir, row.relpath, row.fname)
    [phdf, wrdf] = read_label(spkrfile.with_suffix('.TextGrid'), ftype='praat', 
        tiers=['phones', 'words'])
    
    phdf = phdf[phdf['phones']!=''].copy()
    phdf['phone_dur'] = phdf['t2']-phdf['t1'] 
    phdf['prev']=phdf['phones'].shift()
    phdf['nxt']=phdf['phones'].shift(-1)
    phdf = phdf[phdf['phones'].isin(vowels) & (phdf['phone_dur'] >= 0.05)] 
    
    tg = pd.merge_asof(
        phdf[['t1', 't2', 'phones', 'phone_dur', 'prev', 'nxt']],               
        wrdf[['t1', 'words']], 
        on='t1', 
        suffixes=['_ph', '_wd']
    )
    
    tg.insert(2, 'speaker', row.relpath)
    tg.insert(3, 'recording', row.fname) 
    
    print('Done with TG') 

    wav = datadir / row.relpath / row.fname
    snd = Sound(str(wav))
    
    for _, row in tg.iterrows():
        part = snd.extract_part(row.t1 - 0.05, row.t2 + 0.05, preserve_times=True)
        fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5, 
                         1e-6, 1e-6, 1.5, 5, 0.000001, 'no')
        
        opttable = pcall(fp_token, 'Down to Table (optimal interval)...', *downtotableparams.values())
        optmatrix = pcall(opttable, 'Down to Matrix')
        fmtdf = pd.DataFrame({
            c: pd.Series(optmatrix.values[:, i], dtype=downtotabledtype[c]) for i, c in enumerate(downtotablecols)
        })
            
        fmtdf['t1'] = row.t1
        fmtdf['t2'] = row.t2
        
        results.append(fmtdf)
    
    print('Done with formant extraction')
        
final_fmtdf = pd.concat(results, ignore_index=True)
final_fmtdf

S01
Done with TG
Done with formant extraction


Unnamed: 0,time(s),intensity,nformants,F1(Hz),B1(Hz),F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress,t1,t2
0,0.044647,0.00006,4,430.479004,514.500977,1695.396973,981.478027,2929.024902,740.489990,3987.624023,919.161987,,,4994.0,20.129999,0.067296,0.171998
1,0.049647,0.00100,5,674.703003,372.981995,1849.902954,773.984009,3112.315918,1028.932007,3586.970947,1894.979980,4420.027832,707.030029,4994.0,20.129999,0.067296,0.171998
2,0.054647,0.00500,5,663.950989,270.420990,1853.202026,713.364014,3089.398926,764.244995,3863.785889,2084.045898,4411.801758,836.869995,4994.0,20.129999,0.067296,0.171998
3,0.059647,0.00400,4,572.343018,227.395996,1679.572021,572.463989,2937.693115,334.194000,4004.259033,465.640015,,,4994.0,20.129999,0.067296,0.171998
4,0.064647,0.00100,4,555.164001,152.682999,1519.003052,270.213013,2912.375000,301.436005,4041.623047,446.658997,,,4994.0,20.129999,0.067296,0.171998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77014,1055.994995,0.00005,4,808.184021,80.296997,1411.390015,55.686001,2707.562988,725.008972,4202.729980,171.472000,,,5250.0,50.020000,1055.800000,1055.990000
77015,1056.000000,0.00003,5,784.513977,108.293999,1451.547974,75.914001,2698.145020,909.895996,4057.861084,2445.483887,4217.457031,100.747002,5250.0,50.020000,1055.800000,1055.990000
77016,1056.005005,0.00003,5,778.262024,124.509003,1474.616943,155.272995,2647.548096,759.315979,4151.089844,2061.368896,4253.875977,244.147995,5250.0,50.020000,1055.800000,1055.990000
77017,1056.010010,0.00006,5,748.396973,157.764008,1528.812012,269.334015,2540.831055,1037.921021,3692.583008,2298.475098,4389.940918,153.194000,5250.0,50.020000,1055.800000,1055.990000


In [26]:
merged_df = pd.merge(tg, final_fmtdf, on=['t1', 't2'], how='left')
print(merged_df.columns)
merged_df

Index(['t1', 't2', 'speaker', 'recording', 'phones', 'phone_dur', 'prev',
       'nxt', 'words', 'time(s)', 'intensity', 'nformants', 'F1(Hz)', 'B1(Hz)',
       'F2(Hz)', 'B2(Hz)', 'F3(Hz)', 'B3(Hz)', 'F4(Hz)', 'B4(Hz)', 'F5(Hz)',
       'B5(Hz)', 'Ceiling(Hz)', 'Stress'],
      dtype='object')


Unnamed: 0,t1,t2,speaker,recording,phones,phone_dur,prev,nxt,words,time(s),...,F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress
0,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.554000,...,2286.775879,314.464996,3146.791016,549.070984,4274.974121,1007.372009,,,5519.200195,75.019997
1,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.559000,...,2175.763916,218.878006,3128.771973,589.028015,4202.811035,641.856018,,,5519.200195,75.019997
2,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.564000,...,2191.938965,215.955002,3034.591064,468.304993,4266.092773,659.435974,,,5519.200195,75.019997
3,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.569000,...,2202.987061,408.235992,2912.492920,289.042999,4454.335938,657.020020,,,5519.200195,75.019997
4,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.574000,...,2236.481934,285.800995,2898.188965,538.234985,4527.219238,414.220001,,,5519.200195,75.019997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76927,1055.800,1055.990,S01,S01_interview.wav,AO1,0.19,L,T,lot,1055.994995,...,1411.390015,55.686001,2707.562988,725.008972,4202.729980,171.472000,,,5250.000000,50.020000
76928,1055.800,1055.990,S01,S01_interview.wav,AO1,0.19,L,T,lot,1056.000000,...,1451.547974,75.914001,2698.145020,909.895996,4057.861084,2445.483887,4217.457031,100.747002,5250.000000,50.020000
76929,1055.800,1055.990,S01,S01_interview.wav,AO1,0.19,L,T,lot,1056.005005,...,1474.616943,155.272995,2647.548096,759.315979,4151.089844,2061.368896,4253.875977,244.147995,5250.000000,50.020000
76930,1055.800,1055.990,S01,S01_interview.wav,AO1,0.19,L,T,lot,1056.010010,...,1528.812012,269.334015,2540.831055,1037.921021,3692.583008,2298.475098,4389.940918,153.194000,5250.000000,50.020000


In [27]:
merged_df.to_csv('formants_S01_test_12-03-24.csv')

In [12]:
#pd.DataFrame(optmatrix.values)

# Multiple speaker loop

In [31]:
vowels = ['IY', 'IY0', 'IY1', 'IY2', 'IH', 'IH0', 'IH1', 'IH2', 'EY', 'EY0', 'EY1', 'EY2', 'EH', 'EH0', 'EH1', 'EH2', 
          'AH', 'AH0', 'AH1', 'AE', 'AE0', 'AE1', 'AE2', 'ER', 'ER0', 'ER1', 'ER2', 'UW', 'UW0', 'UW1', 'UW2','UH', 
          'UH0', 'UH1', 'UH2', 'OW', 'OW0', 'OW1', 'OW2', 'AA', 'AA0', 'AA1', 'AA2', 'AO', 'AO0', 'AO1', 'AO2']
datadir = Path('./data').absolute()
spkrdf = dir2df(datadir, fnpat=r'\.wav$')
spkr_list = []

for row in spkrdf.itertuples():
    print(row.relpath)
    
    spkrfile = Path(datadir, row.relpath, row.fname)
    [phdf, wrdf] = read_label(spkrfile.with_suffix('.TextGrid'), ftype='praat', 
        tiers=['phones', 'words'])
    
    phdf = phdf[phdf['phones']!=''].copy()
    phdf['phone_dur'] = phdf['t2']-phdf['t1'] 
    phdf['prev']=phdf['phones'].shift()
    phdf['nxt']=phdf['phones'].shift(-1)
    phdf = phdf[phdf['phones'].isin(vowels) & (phdf['phone_dur'] >= 0.05)] 
    
    tg = pd.merge_asof(
        phdf[['t1', 't2', 'phones', 'phone_dur', 'prev', 'nxt']],               
        wrdf[['t1', 'words']], 
        on='t1', 
        suffixes=['_ph', '_wd']
    )
    
    tg.insert(2, 'speaker', row.relpath)
    tg.insert(3, 'recording', row.fname) 
    
    print('Done with TG') 

    wav = datadir / row.relpath / row.fname
    snd = Sound(str(wav))
    
    for _, row in tg.iterrows():
        part = snd.extract_part(row.t1 - 0.05, row.t2 + 0.05, preserve_times=True)
        fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5, 
                         1e-6, 1e-6, 1.5, 5, 0.000001, 'no')
        
        opttable = pcall(fp_token, 'Down to Table (optimal interval)...', *downtotableparams.values())
        optmatrix = pcall(opttable, 'Down to Matrix')
        fmtdf = pd.DataFrame({
            c: pd.Series(optmatrix.values[:, i], dtype=downtotabledtype[c]) for i, c in enumerate(downtotablecols)
        })
            
        fmtdf['t1'] = row.t1
        fmtdf['t2'] = row.t2
        
        spkr_list.append(fmtdf)
        
    final_fmtdf = pd.concat(spkr_list, ignore_index=True)
    print('Done with formant extraction')
        
    merged_df = pd.merge(tg, final_fmtdf, on=['t1', 't2'], how='left')
    spkr_list.append(merged_df)

fulldf = pd.concat(spkr_list, ignore_index=True)
print('Done with all interviews')

fulldf

S01
Done with TG
Done with formant extraction
S02
Done with TG
Done with formant extraction
S03
Done with TG
Done with formant extraction
S03
Done with TG
Done with formant extraction
S05
Done with TG
Done with formant extraction
S06
Done with TG
Done with formant extraction
S07
Done with TG
Done with formant extraction
S08
Done with TG
Done with formant extraction
S09
Done with TG
Done with formant extraction
S10
Done with TG
Done with formant extraction
S11
Done with TG
Done with formant extraction
S12
Done with TG
Done with formant extraction
S13
Done with TG
Done with formant extraction
S14
Done with TG
Done with formant extraction
S15
Done with TG
Done with formant extraction
S16
Done with TG
Done with formant extraction
S17
Done with TG
Done with formant extraction
S19
Done with TG
Done with formant extraction
S20
Done with TG
Done with formant extraction
S21
Done with TG
Done with formant extraction
S22
Done with TG
Done with formant extraction
S23
Done with TG
Done with formant

  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
  fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust

Done with formant extraction
S37
Done with TG
Done with formant extraction
S39
Done with TG
Done with formant extraction
S41
Done with TG
Done with formant extraction
S42
Done with TG
Done with formant extraction
S43
Done with TG
Done with formant extraction
S44
Done with TG
Done with formant extraction
S45
Done with TG
Done with formant extraction
S46
Done with TG
Done with formant extraction
S47
Done with TG
Done with formant extraction
S48
Done with TG
Done with formant extraction
Done with all interviews


Unnamed: 0,t1,t2,speaker,recording,phones,phone_dur,prev,nxt,words,time(s),...,F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress
0,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.554000,...,2286.775879,314.464996,3146.791016,549.070984,4274.974121,1007.372009,,,5519.200195,75.019997
1,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.559000,...,2175.763916,218.878006,3128.771973,589.028015,4202.811035,641.856018,,,5519.200195,75.019997
2,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.564000,...,2191.938965,215.955002,3034.591064,468.304993,4266.092773,659.435974,,,5519.200195,75.019997
3,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.569000,...,2202.987061,408.235992,2912.492920,289.042999,4454.335938,657.020020,,,5519.200195,75.019997
4,10.579,10.779,S01,S01_interview.wav,EY1,0.20,D,IH1,day,10.574000,...,2236.481934,285.800995,2898.188965,538.234985,4527.219238,414.220001,,,5519.200195,75.019997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2790734,1671.510,1671.620,S48,S48_interview.wav,AA1,0.11,N,N,non,1671.622559,...,1072.671997,928.635986,2870.916016,119.194000,3774.926025,130.998993,5104.848145,1055.343994,5802.100098,58.880001
2790735,1671.510,1671.620,S48,S48_interview.wav,AA1,0.11,N,N,non,1671.627441,...,1029.883057,772.820984,2836.319092,198.953003,3793.440918,133.615005,5118.627930,980.356995,5802.100098,58.880001
2790736,1671.510,1671.620,S48,S48_interview.wav,AA1,0.11,N,N,non,1671.632446,...,1098.662964,1068.869019,2820.029053,265.444000,3795.241943,368.496002,5059.099121,1120.906982,5802.100098,58.880001
2790737,1671.510,1671.620,S48,S48_interview.wav,AA1,0.11,N,N,non,1671.637451,...,1048.411011,1329.093994,2867.510010,215.617004,3804.004883,860.091980,4979.767090,952.260986,5802.100098,58.880001


In [32]:
#fulldf.to_csv('formants_interviews_12-03-24.csv')

# Map tasks

In [3]:
datadir = Path('./data_partial').absolute()
spkrdf = dir2df(datadir, fnpat=r'\.wav$')
#spkrdf

In [5]:
vowels = ['IY', 'IY0', 'IY1', 'IY2', 'IH', 'IH0', 'IH1', 'IH2', 'EY', 'EY0', 'EY1', 'EY2', 'EH', 'EH0', 'EH1', 'EH2', 
          'AH', 'AH0', 'AH1', 'AE', 'AE0', 'AE1', 'AE2', 'ER', 'ER0', 'ER1', 'ER2', 'UW', 'UW0', 'UW1', 'UW2','UH', 
          'UH0', 'UH1', 'UH2', 'OW', 'OW0', 'OW1', 'OW2', 'AA', 'AA0', 'AA1', 'AA2', 'AO', 'AO0', 'AO1', 'AO2']
datadir = Path('./data_partial').absolute()
spkrdf = dir2df(datadir, fnpat=r'\.wav$')
spkr_list = []

for row in spkrdf.itertuples():
    print(row.relpath)

    spkrfile = Path(datadir, row.relpath, row.fname)
    textgrid_file = spkrfile.with_suffix('.TextGrid')
    wav_file = spkrfile.with_suffix('.wav')

    # Extract speaker IDs from the filename
    speakers = row.fname.split('_')[:2]  # Assuming format is 'S01_S02_map_task'

    for spkr in speakers:
        tier1_name = f"{spkr} - phones"  # Phones tier for the current speaker
        tier2_name = f"{spkr} - words"   # Words tier for the current speaker
        print(f"Processing {spkr}")
    
        # Read the specific tiers
        try:
            [phdf, wrdf] = read_label(textgrid_file, ftype='praat', tiers=[tier1_name, tier2_name])
        except KeyError:
            print(f"Tier {tier1_name} or {tier2_name} not found in {textgrid_file}. Skipping.")
            continue

        phdf.rename(columns={'label': 'phones'}, inplace=True)
        wrdf.rename(columns={'label': 'words'}, inplace=True)
        
        phdf = phdf[phdf['phones'] != ''].copy()
        phdf['phone_dur'] = phdf['t2'] - phdf['t1']
        phdf['prev'] = phdf['phones'].shift()
        phdf['nxt'] = phdf['phones'].shift(-1)
        phdf = phdf[phdf['phones'].isin(vowels) & (phdf['phone_dur'] >= 0.05)]

        # Merge with word tier
        tg = pd.merge_asof(
            phdf[['t1', 't2', 'phones', 'phone_dur', 'prev', 'nxt']],
            wrdf[['t1', 'words']],
            on='t1',
            suffixes=['_ph', '_wd']
        )
        
        tg.insert(2, 'speaker', spkr)  # Add speaker ID
        tg.insert(3, 'recording', row.fname)

        print('Done with TG')

        snd = Sound(str(wav_file))
        results = []
        for _, phone_row in tg.iterrows():
            part = snd.extract_part(phone_row.t1 - 0.05, phone_row.t2 + 0.05, preserve_times=True)
            fp_token = pcall(part, 'To FormantPath...', 0.005, 5, 5250, 0.025, 50, 'Robust', 0.05, 5,
                             1e-6, 1e-6, 1.5, 5, 0.000001, 'no')

            opttable = pcall(fp_token, 'Down to Table (optimal interval)...', *downtotableparams.values())
            optmatrix = pcall(opttable, 'Down to Matrix')
            fmtdf = pd.DataFrame({
                c: pd.Series(optmatrix.values[:, i], dtype=downtotabledtype[c]) for i, c in enumerate(downtotablecols)
            })

            fmtdf['t1'] = phone_row.t1
            fmtdf['t2'] = phone_row.t2
            results.append(fmtdf)

        if results:
            final_fmtdf = pd.concat(results, ignore_index=True)
            print('Done with formant extraction')

            merged_df = pd.merge(tg, final_fmtdf, on=['t1', 't2'], how='left')
            spkr_list.append(merged_df)

fulldf = pd.concat(spkr_list, ignore_index=True)
print('Done with all interviews')

fulldf

S01+S02
Processing S01
Done with TG
Done with formant extraction
Processing S02
Done with TG
Done with formant extraction
S01+S02
Processing S01
Done with TG
Done with formant extraction
Processing S02
Done with TG
Done with formant extraction
S03+S04
Processing S03
Done with TG
Done with formant extraction
Processing S04
Done with TG
Done with formant extraction
S05+S06
Processing S05
Done with TG
Done with formant extraction
Processing S06
Done with TG
Done with formant extraction
S05+S06
Processing S05
Done with TG
Done with formant extraction
Processing S06
Done with TG
Done with formant extraction
S07+S08
Processing S07
Done with TG
Done with formant extraction
Processing S08
Done with TG
Done with formant extraction
S07+S08
Processing S07
Done with TG
Done with formant extraction
Processing S08
Done with TG
Done with formant extraction
S09+S10
Processing S09
Done with TG
Done with formant extraction
Processing S10
Done with TG
Done with formant extraction
S11+S12
Processing S11
D

Unnamed: 0,t1,t2,speaker,recording,phones,phone_dur,prev,nxt,words,time(s),...,F2(Hz),B2(Hz),F3(Hz),B3(Hz),F4(Hz),B4(Hz),F5(Hz),B5(Hz),Ceiling(Hz),Stress
0,12.459,12.519,S01,S01_S02_map_task_1.wav,EH1,0.06,DH,N,then,12.434000,...,1176.969971,217.636993,2208.204102,517.508972,3314.841064,757.875977,4320.865234,832.067017,4994.000000,64.809998
1,12.459,12.519,S01,S01_S02_map_task_1.wav,EH1,0.06,DH,N,then,12.439000,...,1194.199951,227.375000,2187.873047,502.295013,3365.418945,931.122986,4230.558105,829.630005,4994.000000,64.809998
2,12.459,12.519,S01,S01_S02_map_task_1.wav,EH1,0.06,DH,N,then,12.444000,...,1192.911011,397.169006,2102.447021,609.593994,3294.070068,1106.786011,4150.896973,592.426025,4994.000000,64.809998
3,12.459,12.519,S01,S01_S02_map_task_1.wav,EH1,0.06,DH,N,then,12.449000,...,1153.598022,449.730988,2087.538086,556.864990,3334.962891,972.411011,4213.023926,649.867004,4994.000000,64.809998
4,12.459,12.519,S01,S01_S02_map_task_1.wav,EH1,0.06,DH,N,then,12.454000,...,1112.297974,398.701996,2056.122070,483.557007,3256.337891,966.838989,4290.160156,631.763977,4994.000000,64.809998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802208,623.770,623.970,S48,S47_S48_map_task_2.wav,AH1,0.20,D,N,done,623.974976,...,1741.725952,441.480011,2753.904053,116.959999,3887.655029,550.534973,5446.374023,1091.661011,6412.399902,77.510002
802209,623.770,623.970,S48,S47_S48_map_task_2.wav,AH1,0.20,D,N,done,623.979980,...,1720.797974,311.416992,2749.358887,152.815994,3836.992920,462.904999,5543.412109,862.195984,6412.399902,77.510002
802210,623.770,623.970,S48,S47_S48_map_task_2.wav,AH1,0.20,D,N,done,623.984985,...,1708.181030,210.610001,2744.299072,191.151993,3771.196045,419.816010,5545.449219,688.142029,6412.399902,77.510002
802211,623.770,623.970,S48,S47_S48_map_task_2.wav,AH1,0.20,D,N,done,623.989990,...,1693.086060,182.240005,2835.042969,263.972992,3773.063965,522.596985,5521.381836,693.107971,6412.399902,77.510002


In [6]:
fulldf.to_csv('formants_maps_12-08-24.csv')

# Parallel processing

In [None]:
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
from pathlib import Path

# List of vowels
vowels = ['IY', 'IY0', 'IY1', 'IY2', 'IH', 'IH0', 'IH1', 'IH2', 'EY', 'EY0', 'EY1', 'EY2', 'EH', 'EH0', 'EH1', 'EH2', 
          'AH', 'AH0', 'AH1', 'AE', 'AE0', 'AE1', 'AE2', 'ER', 'ER0', 'ER1', 'ER2', 'UW', 'UW0', 'UW1', 'UW2', 'UH', 
          'UH0', 'UH1', 'UH2', 'OW', 'OW0', 'OW1', 'OW2', 'AA', 'AA0', 'AA1', 'AA2', 'AO', 'AO0', 'AO1', 'AO2']

datadir = Path('./data').absolute()
spkrdf = dir2df(datadir, fnpat=r'\.wav$')  # Assuming dir2df function exists

# Function to process a single speaker row
def process_speaker(row_data):
    row = pd.Series(row_data)  # Convert list back to Series
    tglist = []
    try:
        print(f"Processing: {row.relpath}")
        
        spkrfile = Path(datadir, row.relpath, row.fname)
        
        # TextGrid portion
        print(f"Reading TextGrid for {row.relpath}")
        [phdf, wrdf] = read_label(spkrfile.with_suffix('.TextGrid'), ftype='praat', tiers=['phones', 'words'])
        phdf = phdf[phdf['phones'] != ''].copy()
        phdf['phone_dur'] = phdf['t2'] - phdf['t1']
        phdf['prev'] = phdf['phones'].shift()
        phdf['nxt'] = phdf['phones'].shift(-1)
        phdf = phdf[phdf['phones'].isin(vowels) & (phdf['phone_dur'] >= 0.05)]
        
        tg = pd.merge_asof(
            phdf[['t1', 't2', 'phones', 'phone_dur', 'prev', 'nxt']],
            wrdf[['t1', 'words']],
            on='t1',
            suffixes=['_ph', '_wd']
        )
        tg.insert(0, 'speaker', row.relpath)
        tg.insert(1, 'recording', row.fname)
        print('Done with TextGrid processing.')

        # wav portion
        wav = datadir / row.relpath / row.fname
        print(f"Processing wav file: {wav}")
        snd = Sound(str(wav))
        fp = pcall(snd, 'To FormantPath...', *fpathargs)
        print(f"Ceilings used for {row.relpath}: {pcall(fp, 'List ceiling frequencies')}")
        
        opttable = pcall(fp, 'Down to Table (optimal interval)...', *downtotableparams.values())
        optmatrix = pcall(opttable, 'Down to Matrix')
        fmtdf = pd.DataFrame({
            c: pd.Series(optmatrix.values[:, i], dtype=downtotabledtype[c]) for i, c in enumerate(downtotablecols)
        })
        fmtdf.rename(columns={'time(s)': 't1', 'Ceiling(Hz)': 'ceiling'}, inplace=True)
        print('Done with wav processing.')

        # Get median formants for each token
        print(f"Getting median formants for tokens in {row.relpath}")
        for trow in tg.itertuples(index=False):
            token_df = get_formants_for_token(trow, fmtdf)
            tglist.append(token_df)
        
    except Exception as e:
        print(f"Error processing {row.relpath}: {e}")
    
    # Concatenate all results for this speaker, if any
    return pd.concat(tglist, ignore_index=True) if tglist else pd.DataFrame()

# Prepare the data as a list of dictionaries for each speaker row (this avoids the pickling issue)
row_data_list = spkrdf.to_dict(orient='records')

# Run parallel processing
with ProcessPoolExecutor() as executor:
    print("Starting parallel processing...")
    results = list(executor.map(process_speaker, row_data_list))

# Combine all results into a single DataFrame
fulldf = pd.concat(results, ignore_index=True)

print("Processing complete.")
print(f"Full data frame shape: {fulldf.shape}")