# Get Spanish Formant trajectories

In [35]:
import parselmouth
from parselmouth.praat import call
import os
import re
import csv
import pandas as pd
from audiolabel import read_label

### First get data for female speakers with max Hz set to 5500 Hz

In [36]:
# directory of MFA'ed TextGrids
tg_path = 'data/spanish/female/'
# directory of wav files
wav_path = 'data/spanish/female/'
# directory to save vowel measurement data
data_path = 'data/'


# formant analysis parameters
time_step = 0.1
maximum_number_of_formants = 5
maximum_formant = 5500 # Hz for adult female
window_length = 0.025 # seconds
preemphasis_from = 50

# take formant measurements every X seconds?
segment_window = 0.005 # 5 milliseconds

In [37]:
# list (of dictionaries) to save results to
data = []

# counter to keep track of how long this script takes to run
file_counter = 1

for file in os.listdir(wav_path):
    if file.endswith('.wav'):
        
        # Check if corresponding TextGrid exists
        if os.path.exists(tg_path + file[:-4] + '.TextGrid'):
            
            # Print out which file we're currently working on
            print(file_counter, 'Processing file', file)
            file_counter += 1
            
            if file.startswith("s"):
                # names of tiers with phonemes of interest
                phones_tiers = ['phone']
            
            else:
                # names of tiers with phonemes of interest
                phones_tiers = ['default - phones']
            
            # Create sound object
            wav = parselmouth.Sound(wav_path + file)
            
            # Create formant object
            formant = wav.to_formant_burg(time_step, maximum_number_of_formants, maximum_formant, window_length, preemphasis_from)
            
            # Open textgrid
            tg = parselmouth.Data.read(tg_path + file[:-4] + '.TextGrid')
            
            # Iterate over the tiers and find the ones w/ phonemes
            numTiers = call(tg, 'Get number of tiers')
            for tierIdx in range(1, numTiers + 1): # Praat counts from 1, not 0
                tierName = call(tg, 'Get tier name', tierIdx)
                
                if tierName in phones_tiers:
                    numIntervals = call(tg, 'Get number of intervals', tierIdx)
                    
                    for intervalIdx in range(1, numIntervals + 1):
                        label = call(tg, 'Get label of interval', tierIdx, intervalIdx)
                        
                        # check if interval label is a vowel,
                        # i.e., it is not blank, and it ends in a digit
                        vowel_list = ["a", "a+", "e", "e+", "i", "i+", "o", "o+", "u", "u+", "aI", "aU", "oI", "j", "w"]
                        if label and label in vowel_list:
                            start = call(tg, 'Get starting point', tierIdx, intervalIdx)
                            end = call(tg, 'Get end point', tierIdx, intervalIdx)
                            dur = end - start
                            
                            # number of X millisecond segments to take measurements from
                            numSegments = int(dur / segment_window) # note: this must be an integer
                            
                            # loop over segments and calculate F1/F2
                            for segmentIdx in range(1, numSegments + 1):
                                spot = start + (segmentIdx * segment_window)
                                tim_int = spot - start
                                
                                f1 = call(formant, 'Get value at time', 1, spot, 'Hertz', 'Linear')
                                f2 = call(formant, 'Get value at time', 2, spot, 'Hertz', 'Linear')
                                f3 = call(formant, 'Get value at time', 3, spot, 'Hertz', 'Linear')
                                
                                # add to our data set
                                data.append({'Video ID': file[:-4],
                                             'Vowel': label,
                                             't1_ph': start,
                                             'Time_of_formant_measurements': spot,
                                             'Time_from_vowel_onset': tim_int,
                                             'F1': f1,
                                             'F2': f2,
                                             'F3': f3,
                                             'Tier': tierName,
                                            })
        
print('Done')

1 Processing file p111_spanish1.wav
2 Processing file p111_spanish2.wav
3 Processing file p111_spanish3.wav
4 Processing file p113_spanish1.wav
5 Processing file p113_spanish2.wav
6 Processing file p113_spanish3.wav
7 Processing file p114_spanish1.wav
8 Processing file p114_spanish2.wav
9 Processing file p114_spanish3.wav
10 Processing file p117_spanish1.wav
11 Processing file p117_spanish2.wav
12 Processing file p117_spanish3.wav
13 Processing file p118_spanish1.wav
14 Processing file p118_spanish2.wav
15 Processing file p118_spanish3.wav
16 Processing file p120_spanish1.wav
17 Processing file p120_spanish2.wav
18 Processing file p120_spanish3.wav
19 Processing file p121_spanish1.wav
20 Processing file p121_spanish2.wav
21 Processing file p121_spanish3.wav
22 Processing file p122_spanish1.wav
23 Processing file p122_spanish2.wav
24 Processing file p122_spanish3.wav
25 Processing file p123_spanish1.wav
26 Processing file p123_spanish2.wav
27 Processing file p123_spanish3.wav
28 Process

Make sure to rename data for males so they can be run separately

In [38]:
data[0]

{'Video ID': 'p111_spanish1',
 'Vowel': 'a',
 't1_ph': 8.178,
 'Time_of_formant_measurements': 8.183000000000002,
 'Time_from_vowel_onset': 0.005000000000000782,
 'F1': 579.9256822556335,
 'F2': 1318.6660546903286,
 'F3': 2603.4668149064714,
 'Tier': 'default - phones'}

### Now the male data, where max Hz set to 5000

In [39]:
# directory of MFA'ed TextGrids
tg_path = 'data/spanish/male/'
# directory of wav files
wav_path = 'data/spanish/male/'
# directory to save vowel measurement data
data_path = 'data/'


# formant analysis parameters
time_step = 0.1
maximum_number_of_formants = 5
maximum_formant = 5000 # Hz for adult male
window_length = 0.025 # seconds
preemphasis_from = 50

# take formant measurements every X seconds?
segment_window = 0.005 # 5 milliseconds

In [40]:
# list (of dictionaries) to save results to
data_male = []

# counter to keep track of how long this script takes to run
file_counter = 1

for file in os.listdir(wav_path):
    if file.endswith('.wav'):
        
        # Check if corresponding TextGrid exists
        if os.path.exists(tg_path + file[:-4] + '.TextGrid'):
            
            # Print out which file we're currently working on
            print(file_counter, 'Processing file', file)
            file_counter += 1
            
            if file.startswith("s"):
                # names of tiers with phonemes of interest
                phones_tiers = ['phone']
            
            else:
                # names of tiers with phonemes of interest
                phones_tiers = ['default - phones']
            
            # Create sound object
            wav = parselmouth.Sound(wav_path + file)
            
            # Create formant object
            formant = wav.to_formant_burg(time_step, maximum_number_of_formants, maximum_formant, window_length, preemphasis_from)
            
            # Open textgrid
            tg = parselmouth.Data.read(tg_path + file[:-4] + '.TextGrid')
            
            # Iterate over the tiers and find the ones w/ phonemes
            numTiers = call(tg, 'Get number of tiers')
            for tierIdx in range(1, numTiers + 1): # Praat counts from 1, not 0
                tierName = call(tg, 'Get tier name', tierIdx)
                
                if tierName in phones_tiers:
                    numIntervals = call(tg, 'Get number of intervals', tierIdx)
                    
                    for intervalIdx in range(1, numIntervals + 1):
                        label = call(tg, 'Get label of interval', tierIdx, intervalIdx)
                        
                        # check if interval label is a vowel,
                        # i.e., it is not blank, and it ends in a digit
                        vowel_list = ["a", "a+", "e", "e+", "i", "i+", "o", "o+", "u", "u+", "aI", "aU", "oI", "j", "w"]
                        if label and label in vowel_list:
                            start = call(tg, 'Get starting point', tierIdx, intervalIdx)
                            end = call(tg, 'Get end point', tierIdx, intervalIdx)
                            dur = end - start
                            
                            # number of X millisecond segments to take measurements from
                            numSegments = int(dur / segment_window) # note: this must be an integer
                            
                            # loop over segments and calculate F1/F2
                            for segmentIdx in range(1, numSegments + 1):
                                spot = start + (segmentIdx * segment_window)
                                tim_int = spot - start
                                
                                f1 = call(formant, 'Get value at time', 1, spot, 'Hertz', 'Linear')
                                f2 = call(formant, 'Get value at time', 2, spot, 'Hertz', 'Linear')
                                f3 = call(formant, 'Get value at time', 3, spot, 'Hertz', 'Linear')
                                
                                # add to our data set
                                data_male.append({'Video ID': file[:-4],
                                             'Vowel': label,
                                             't1_ph': start,
                                             'Time_of_formant_measurements': spot,
                                             'Time_from_vowel_onset': tim_int,
                                             'F1': f1,
                                             'F2': f2,
                                             'F3': f3,
                                             'Tier': tierName,
                                            })
        
print('Done')

1 Processing file p119_spanish1.wav
2 Processing file p119_spanish2.wav
3 Processing file p119_spanish3.wav
4 Processing file s00101.wav
5 Processing file s00102.wav
6 Processing file s00103.wav
7 Processing file s00104.wav
8 Processing file s00105.wav
9 Processing file s00106.wav
10 Processing file s00107.wav
11 Processing file s00108.wav
12 Processing file s00109.wav
13 Processing file s00110.wav
14 Processing file s00111.wav
15 Processing file s00112.wav
16 Processing file s00113.wav
17 Processing file s00114.wav
18 Processing file s00115.wav
19 Processing file s00116.wav
20 Processing file s00117.wav
21 Processing file s00118.wav
22 Processing file s00119.wav
23 Processing file s00120.wav
24 Processing file s00121.wav
25 Processing file s00122.wav
26 Processing file s00123.wav
27 Processing file s00124.wav
28 Processing file s00125.wav
29 Processing file s00126.wav
30 Processing file s00127.wav
31 Processing file s00128.wav
32 Processing file s00129.wav
33 Processing file s00130.wa

In [41]:
data_male[0]

{'Video ID': 'p119_spanish1',
 'Vowel': 'a',
 't1_ph': 64.827,
 'Time_of_formant_measurements': 64.832,
 'Time_from_vowel_onset': 0.0049999999999954525,
 'F1': 638.6783281436159,
 'F2': 1223.5057006037123,
 'F3': 2640.21525844693,
 'Tier': 'default - phones'}

### Combine formant data and create DataFrame

In [42]:
formant_list = data + data_male
formantdf = pd.DataFrame(formant_list)
formantdf.sample(25)

Unnamed: 0,Video ID,Vowel,t1_ph,Time_of_formant_measurements,Time_from_vowel_onset,F1,F2,F3,Tier
98961,p118_spanish1,a,76.198,76.243,0.045,659.990375,1506.636368,2657.549528,default - phones
351577,s00128,i,1.463,1.503,0.04,293.938955,2055.809116,2901.571643,phone
165402,p121_spanish3,a,275.024,275.089,0.065,853.630167,1714.294645,2789.465939,default - phones
254936,p126_spanish3,j,185.428,185.453,0.025,679.285254,2310.842689,2962.021673,default - phones
353844,s00138,o,0.522,0.532,0.01,426.420652,1432.112157,2642.818171,phone
346199,s00104,a,1.424,1.499,0.075,1055.060125,2335.000175,2953.909775,phone
151828,p121_spanish2,e,151.401,151.516,0.115,517.968364,1735.031739,2369.876739,default - phones
325357,p119_spanish1,o,128.789,129.009,0.22,1019.400107,1878.236408,2931.310383,default - phones
77251,p117_spanish1,a,115.374,115.449,0.075,601.960162,943.709018,2359.306308,default - phones
137410,p120_spanish3,o,273.987,274.127,0.14,786.945541,1786.27018,2238.905263,default - phones


In [43]:
# save formants to csv
formantdf.to_csv("data/formants_spa.csv", index = False)

### Merge with tg df

In [44]:
tgdf = pd.read_csv("data/tg_data.csv")
tgdf.head()

Unnamed: 0,t1_ph,t2_ph,fname,dur_ph,Participant,Corpus,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,is_stress,GP_ph,is_vowel
0,8.068,8.178,data/tgs/p111_spanish1.TextGrid,0.11,p111,CBAS,8.068,8.558,bajo,True,False,1,b,0
1,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,False,1,a,1
2,8.278,8.368,data/tgs/p111_spanish1.TextGrid,0.09,p111,CBAS,8.068,8.558,bajo,False,False,0,x,0
3,8.368,8.558,data/tgs/p111_spanish1.TextGrid,0.19,p111,CBAS,8.068,8.558,bajo,False,True,0,o,1
4,10.477,10.567,data/tgs/p111_spanish1.TextGrid,0.09,p111,CBAS,10.477,10.957,zombi,True,False,1,s,0


Create new column `Video ID` that has rows of form `p111_spanish1`

In [45]:
tgdf['Video ID'] = tgdf['fname'].apply(lambda x: x[9:-9])
tgdf.head()

Unnamed: 0,t1_ph,t2_ph,fname,dur_ph,Participant,Corpus,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,is_stress,GP_ph,is_vowel,Video ID
0,8.068,8.178,data/tgs/p111_spanish1.TextGrid,0.11,p111,CBAS,8.068,8.558,bajo,True,False,1,b,0,p111_spanish1
1,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,False,1,a,1,p111_spanish1
2,8.278,8.368,data/tgs/p111_spanish1.TextGrid,0.09,p111,CBAS,8.068,8.558,bajo,False,False,0,x,0,p111_spanish1
3,8.368,8.558,data/tgs/p111_spanish1.TextGrid,0.19,p111,CBAS,8.068,8.558,bajo,False,True,0,o,1,p111_spanish1
4,10.477,10.567,data/tgs/p111_spanish1.TextGrid,0.09,p111,CBAS,10.477,10.957,zombi,True,False,1,s,0,p111_spanish1


Now the two dataframes can be merged on `Video ID` and `t1_ph`

In [46]:
spa_tgformants = tgdf.merge(formantdf, on=['Video ID', 't1_ph'])
spa_tgformants.head(50)

Unnamed: 0,t1_ph,t2_ph,fname,dur_ph,Participant,Corpus,t1_wd,t2_wd,word,is_wdinit_ph,...,GP_ph,is_vowel,Video ID,Vowel,Time_of_formant_measurements,Time_from_vowel_onset,F1,F2,F3,Tier
0,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.183,0.005,579.925682,1318.666055,2603.466815,default - phones
1,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.188,0.01,610.155201,1308.96484,2600.082277,default - phones
2,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.193,0.015,640.38472,1299.263625,2596.697739,default - phones
3,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.198,0.02,670.614239,1289.562411,2593.313201,default - phones
4,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.203,0.025,700.843758,1279.861196,2589.928663,default - phones
5,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.208,0.03,731.073276,1270.159981,2586.544125,default - phones
6,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.213,0.035,761.302795,1260.458767,2583.159587,default - phones
7,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.218,0.04,791.532314,1250.757552,2579.775049,default - phones
8,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.223,0.045,821.761833,1241.056337,2576.390511,default - phones
9,8.178,8.278,data/tgs/p111_spanish1.TextGrid,0.1,p111,CBAS,8.068,8.558,bajo,False,...,a,1,p111_spanish1,a,8.228,0.05,851.991352,1231.355123,2573.005973,default - phones


In [47]:
spa_tgformants.shape

(355115, 22)

Add in language dominance information.

In [48]:
langdom = pd.DataFrame({"Participant": ['p111', 'p113', 'p114', 'p117','p118', 'p119', 'p120', 'p121', 'p122', 'p123', 'p124', 'p126', 's051', 's053', 's055', 's056', 's001', 's002'],
                       "Dom": ["L2", "biling", "biling", "L2", "biling", "biling", "biling", "L2", "biling", "L2", "biling", "L2", "mono", "mono", "mono", "mono", "mono", "mono"]})

In [49]:
spa_tgformants = spa_tgformants.merge(langdom, on = ["Participant"])
spa_tgformants.sample(10)

Unnamed: 0,t1_ph,t2_ph,fname,dur_ph,Participant,Corpus,t1_wd,t2_wd,word,is_wdinit_ph,...,is_vowel,Video ID,Vowel,Time_of_formant_measurements,Time_from_vowel_onset,F1,F2,F3,Tier,Dom
254678,159.417,159.607,data/tgs/p126_spanish1.TextGrid,0.19,p126,CBAS,159.037,159.607,convoy,False,...,1,p126_spanish1,oI,159.472,0.055,656.511153,1224.573849,2948.632876,default - phones,L2
258575,37.84,38.14,data/tgs/p126_spanish2.TextGrid,0.3,p126,CBAS,37.84,38.14,hoy,True,...,1,p126_spanish2,oI,38.025,0.185,585.12014,1601.267958,2748.333918,default - phones,L2
140663,146.172,146.272,data/tgs/p120_spanish1.TextGrid,0.1,p120,CBAS,145.902,146.612,baliza,False,...,1,p120_spanish1,i,146.242,0.07,302.370388,2245.727048,3090.625502,default - phones,biling
79545,291.28,291.35,data/tgs/p117_spanish1.TextGrid,0.07,p117,CBAS,291.28,291.97,amargas,True,...,1,p117_spanish1,a,291.29,0.01,601.696188,1366.345256,2954.026484,default - phones,L2
176518,292.738,292.978,data/tgs/p121_spanish2.TextGrid,0.24,p121,CBAS,292.398,292.978,llave,False,...,1,p121_spanish2,e,292.903,0.165,764.375014,2326.8205,2972.874795,default - phones,L2
89227,43.2,43.41,data/tgs/p117_spanish3.TextGrid,0.21,p117,CBAS,42.86,43.41,harto,False,...,1,p117_spanish3,o,43.345,0.145,804.70576,1846.427142,3384.405734,default - phones,L2
117830,144.852,144.902,data/tgs/p119_spanish1.TextGrid,0.05,p119,CBAS,144.742,145.302,águilas,False,...,1,p119_spanish1,i,144.867,0.015,451.552872,2060.199878,2458.221435,default - phones,biling
201378,85.322,85.492,data/tgs/p122_spanish3.TextGrid,0.17,p122,CBAS,84.862,85.492,convicto,False,...,1,p122_spanish3,o,85.482,0.16,778.399504,1679.287926,2757.961692,default - phones,biling
292736,1.326,1.375,data/tgs/s00233.TextGrid,0.049,s002,DIMEx100,1.255,1.375,no,False,...,1,s00233,o,1.361,0.035,640.010069,1490.40408,2788.3839,phone,mono
15237,189.2,189.36,data/tgs/p111_spanish2.TextGrid,0.16,p111,CBAS,188.72,189.36,alfombra,False,...,1,p111_spanish2,a,189.24,0.04,699.419593,1619.67436,2678.488907,default - phones,L2


Create column for average vowel duration, by Participant.

In [50]:
avg_dur = pd.DataFrame(spa_tgformants.groupby(["Participant"])["dur_ph"].mean())
avg_dur = avg_dur.rename(columns={"dur_ph": "avg_dur"})
avg_dur = avg_dur.reset_index(drop = False)
avg_dur.head()

Unnamed: 0,Participant,avg_dur
0,p111,0.12047
1,p113,0.136585
2,p114,0.158571
3,p117,0.110605
4,p118,0.113432


In [51]:
spa_tgformants = spa_tgformants.merge(avg_dur, on = ["Participant"])
spa_tgformants.sample(10)

Unnamed: 0,t1_ph,t2_ph,fname,dur_ph,Participant,Corpus,t1_wd,t2_wd,word,is_wdinit_ph,...,Video ID,Vowel,Time_of_formant_measurements,Time_from_vowel_onset,F1,F2,F3,Tier,Dom,avg_dur
11470,460.377,460.417,data/tgs/p111_spanish1.TextGrid,0.04,p111,CBAS,460.167,460.897,presidente,False,...,p111_spanish1,i,460.387,0.01,391.273433,1689.053077,2819.876091,default - phones,L2,0.12047
45525,64.528,64.608,data/tgs/p113_spanish3.TextGrid,0.08,p113,CBAS,64.378,64.608,muy,False,...,p113_spanish3,j,64.583,0.055,475.978958,2018.910884,2896.605957,default - phones,biling,0.136585
154925,156.273,156.423,data/tgs/p120_spanish3.TextGrid,0.15,p120,CBAS,156.193,156.873,limbo,False,...,p120_spanish3,i,156.373,0.1,319.632891,632.41242,2693.291462,default - phones,biling,0.140621
178923,74.6,74.7,data/tgs/p121_spanish3.TextGrid,0.1,p121,CBAS,73.89,74.89,alumbrador,False,...,p121_spanish3,o,74.655,0.055,615.236577,1140.460094,2825.200258,default - phones,L2,0.153949
206334,33.016,33.086,data/tgs/p123_spanish1.TextGrid,0.07,p123,CBAS,32.946,33.586,carbono,False,...,p123_spanish1,a,33.036,0.02,677.317915,1696.055083,2488.44346,default - phones,L2,0.131626
128441,242.877,242.927,data/tgs/p119_spanish2.TextGrid,0.05,p119,CBAS,242.667,243.367,revocar,False,...,p119_spanish2,e,242.892,0.015,363.57493,1827.486108,2252.279105,default - phones,biling,0.141896
102074,257.162,257.292,data/tgs/p118_spanish1.TextGrid,0.13,p118,CBAS,256.812,257.492,vainilla,False,...,p118_spanish1,i,257.207,0.045,377.417689,1651.281411,3155.685305,default - phones,biling,0.113432
214343,36.21,36.28,data/tgs/p123_spanish2.TextGrid,0.07,p123,CBAS,35.61,36.61,ubicaciones,False,...,p123_spanish2,o,36.255,0.045,737.614428,1510.343168,2620.050315,default - phones,L2,0.131626
70070,171.659,171.859,data/tgs/p114_spanish3.TextGrid,0.2,p114,CBAS,171.419,171.859,pedí,False,...,p114_spanish3,i+,171.859,0.2,458.005003,2357.068575,3236.598929,default - phones,biling,0.158571
263400,192.196,192.266,data/tgs/p126_spanish2.TextGrid,0.07,p126,CBAS,191.726,192.616,labios,False,...,p126_spanish2,o,192.231,0.035,537.481546,1859.949349,2884.633526,default - phones,L2,0.165465


In [52]:
spa_tgformants.to_csv("data/spanish_vowels.csv", index = False)

In [1]:
import pandas as pd
df = pd.read_csv("data/spanish_vowels.csv")
df.groupby(['Participant','t1_ph'])["Vowel"].count().shape

(18645,)