# Extract formant measures from .wav file

This notebook is adapted from a script by [Isaac Bleaman](https://www.isaacbleaman.com/) and utilizes `parselmouth` to extract formant measures from a .wav file with corresponding TextGrid. To demonstrate the functionality, this notebook uses audio and corresponding phone-aligned TextGrids from four female speakers from the Corpus of Bay Area Spanish (CBAS), reading from a wordlist and participating in a sociolinguistic interview.

Since the audio files are in Spanish, we can use regular expressions to isolate labels of a, e, o, i, and u. For English data, with transcriptions generated from the CMU pronunciation dictionary (where vowels are labels with numbers to indicate stress), we can substitute the regular expression <pre>r'\d+$'</pre>

In [1]:
# pip install praat-parselmouth

In [2]:
import parselmouth
from parselmouth.praat import call
import os
import re
import pandas as pd

In [3]:
# directory of TextGrids
tg_path = 'data/'
# directory of wav files
wav_path = 'data/'
# directory to save vowel measurement data
data_path = 'data/'


# names of tiers with phonemes of interest
phones_tiers = ['phones']

# formant analysis parameters
time_step = 0.1
maximum_number_of_formants = 5
maximum_formant = 5500 # Hz
window_length = 0.025 # seconds
preemphasis_from = 50

# take formant measurements every X seconds?
segment_window = 0.005 # 5 milliseconds

In [4]:
# list (of dictionaries) to save results to
data = []

# counter to keep track of how long this script takes to run
file_counter = 1

for file in os.listdir(wav_path):
    if file.endswith('.wav'):
        
        # Check if corresponding TextGrid exists
        if os.path.exists(tg_path + file[:-4] + '.TextGrid'):
            
            # Print out which file we're currently working on
            print(file_counter, 'Processing file', file)
            file_counter += 1
            
            # Create sound object
            wav = parselmouth.Sound(wav_path + file)
            
            # Create formant object
            formant = wav.to_formant_burg(time_step, maximum_number_of_formants, maximum_formant, window_length, preemphasis_from)
            
            # Open textgrid
            tg = parselmouth.Data.read(tg_path + file[:-4] + '.TextGrid')
            
            # Iterate over the tiers and find the ones w/ phonemes
            numTiers = call(tg, 'Get number of tiers')
            for tierIdx in range(1, numTiers + 1): # Praat counts from 1, not 0
                tierName = call(tg, 'Get tier name', tierIdx)
                
                if tierName in phones_tiers:
                    numIntervals = call(tg, 'Get number of intervals', tierIdx)
                    
                    for intervalIdx in range(1, numIntervals + 1):
                        label = call(tg, 'Get label of interval', tierIdx, intervalIdx)
                        
                        # check if interval label is a vowel,
                        # i.e., it is not blank, and it is a, e, i, o or u (or with +), also glides j and w
                        if label and re.search(r'^[a,e,i,o,u,j,w]\+?', label):
                            start = call(tg, 'Get starting point', tierIdx, intervalIdx)
                            end = call(tg, 'Get end point', tierIdx, intervalIdx)
                            dur = end - start
                            
                            word_tier_idx = 1
                            midpoint = (end + start)/2
                            word_interval = call(tg, 'Get interval at time', word_tier_idx, midpoint)
                            word = call(tg, 'Get label of interval', word_tier_idx, word_interval)        
                            
                            # number of X millisecond segments to take measurements from
                            numSegments = int(dur / segment_window) # note: this must be an integer
                            
                            # loop over segments and calculate F1/F2
                            for segmentIdx in range(1, numSegments + 1):
                                spot = start + (segmentIdx * segment_window)
                                tim_int = spot - start
                                
                                f1 = call(formant, 'Get value at time', 1, spot, 'Hertz', 'Linear')
                                f2 = call(formant, 'Get value at time', 2, spot, 'Hertz', 'Linear')
                                
                                # add to our data set
                                data.append({'Video ID': file[:-4],
                                             'Vowel': label[0], # removes + from vowel
                                             'Vowel_onset_time': start,
                                             'Time_of_formant_measurements': spot,
                                             'Time_from_vowel_onset': tim_int,
                                             'F1': f1,
                                             'F2': f2,
                                             'Participant': file[:4], 
                                             'Task': file[5:-4],
                                             'Word': word if word else 'PROBLEM',
                                            })
        
print('Done')

1 Processing file p113_int.wav
2 Processing file p113_wd.wav
3 Processing file p115_int.wav
4 Processing file p115_wd.wav
5 Processing file p120_int.wav
6 Processing file p120_wd.wav
7 Processing file p124_int.wav
8 Processing file p124_wd.wav
Done


In [5]:
# Preview the data
data[0]

{'Video ID': 'p113_int',
 'Vowel': 'o',
 'Vowel_onset_time': 0.19,
 'Time_of_formant_measurements': 0.195,
 'Time_from_vowel_onset': 0.0050000000000000044,
 'F1': 438.50735691139005,
 'F2': 1371.7079676448034,
 'Participant': 'p113',
 'Task': 'int',
 'Word': 'no'}

In [6]:
len(data)

115522

In [7]:
formants = pd.DataFrame.from_dict(data, "columns")
formants.head()

Unnamed: 0,Video ID,Vowel,Vowel_onset_time,Time_of_formant_measurements,Time_from_vowel_onset,F1,F2,Participant,Task,Word
0,p113_int,o,0.19,0.195,0.005,438.507357,1371.707968,p113,int,no
1,p113_int,o,0.19,0.2,0.01,438.708664,1345.533822,p113,int,no
2,p113_int,o,0.19,0.205,0.015,438.90997,1319.359676,p113,int,no
3,p113_int,o,0.19,0.21,0.02,439.111277,1293.18553,p113,int,no
4,p113_int,o,0.19,0.215,0.025,439.312583,1267.011384,p113,int,no


In [8]:
formants.to_csv("data/formants.csv", index = False)