# Formant measurements

This notebook presents a function that allow us to obtain formant values. This function makes use of the [Parselmouth](https://buildmedia.readthedocs.org/media/pdf/parselmouth/latest/parselmouth.pdf) and [audiolabel](https://github.com/rsprouse/audiolabel) packages.

### Libraries

In [None]:
import os  # For some basic file functionality (creating/removing/parsing file paths)
import re  # For regular expessions
import numpy as np  # For advanced numerical calculations, multi-dimensional arrays

# Import audiolabel
from audiolabel import read_label  # For reading TextGrid files
from phonlab.utils import dir2df  # For finding directories and pulling data
import parselmouth  # For incorporating Praat features in this notebook

# Obtain general Praat functionality where native features don't exist
from parselmouth.praat import call as pcall

import pandas as pd  # For creating managing dataframes

### Functions

In [None]:
def findMiddle(input_list):
    '''
    Finds the middle item in a list. A single value is return for an odd-numbered list. The average of the 
    middle points is return for an even-numbered list. This function was presented by Kyle Baker as an answer 
    to a Stackoverflow inquiry:
    https://stackoverflow.com/questions/38130895/find-middle-of-a-list.
    
    Parameters
    ----------
    input_list: a list of values
    
    Returns
    -------
    The value at the center of the list or a tuple for even-numbered lists
    '''
    
    middle = float(len(input_list))/2
    if middle % 2 != 0:
        return input_list[int(middle - .5)]
    else:
        return np.mean([input_list[int(middle)], input_list[int(middle-1)]])

In [None]:
def get_formants(row, psnd):
    '''
    Calculates the formant values for F1, F2, F3, and F4 of any given sound that contains a vowel production.
    For a given vowel, this function returns the mean value for each formant, as well as the formant value at 
    the mid-point of the vowel.
    
    
    Parameters
    ----------
    row: DataFrame row
        (Identifies a portion of audio within the time periods specified. Each row needs to have a 't1_ph' 
        and 't2_ph' attributes, to identify the start and end point of the vowel.) A 'sex' ('male' or 
        'female') attribute is require for each row to set the pitch floor.)
    
    psnd: Parselmouth Sound
       (This is the portion of the audio to be analyzed from 't1_ph' to 't2_ph'.)
    
    
    Returns
    -------
    F1_mean
    F2_mean
    F3_mean
    F4_mean
    
    F1_mid
    F2_mid
    F3_mid
    F4_mid
    '''
    
    # Add buffer time to short vowels, to obtain more accurate pitch values
    if (row.t2_ph - row.t1_ph) < .15:
        row.t1_ph = row.t1_ph - 0.2
        row.t2_ph = row.t2_ph + 0.2
    elif (row.t2_ph - row.t1_ph) < .25:
        row.t1_ph = row.t1_ph - 0.15
        row.t2_ph = row.t2_ph + 0.15
    elif (row.t2_ph - row.t1_ph) < .35:
        row.t1_ph = row.t1_ph - 0.1
        row.t2_ph = row.t2_ph + 0.1
    elif (row.t2_ph - row.t1_ph) < .45:
        row.t1_ph = row.t1_ph - 0.05
        row.t2_ph = row.t2_ph + 0.05

    # Set basic variables
    s = psnd.extract_part(row.t1_ph, row.t2_ph) # Select the audio portion of interest
    pitch_floor = 100  # Select the default pitch floor (for female speakers)
    if row.sex == 'male':  # Change the pitch floor for male speakers
        pitch_floor = 70
    
    # From sound 's', obtain pitch object
    pitch = pcall(
        s,
        'To Pitch (cc)...',
        0.001,  # Time step (s) [default = 0.25 sec]
        pitch_floor, # Pitch floor (Hz) [default = 75 Hz]
        15,  # Max. number of candidates [default]
        0,  # Very accurate (unselected) [default = unselected, i.e. off]
        0.03,  # Silence threshold [default]
        0.45,  # Voicing threashold [default]
        0.01,  # Octave cost [default]
        0.35,  # Octave-jump cost [default]
        0.14,  # Voiced/unvoided cost [default]
        250.0,  # Pitch ceiling (Hz) [default = 600 Hz]
        )
    
    # From sound 's' and 'pitch' object, obtain PointProcess object (i.e. 'pulses')
    pulses = pcall([s, pitch], 'To PointProcess (cc)')
    
    # Set basic variables
    max_fq=5500
    if row.sex == 'male':
        max_fq = 5000
    
    # From sound 's', obtain the formant values
    formants = pcall(
        s, 
        "To Formant (burg)", 
        0.001,  # Time step (s) [default = 0.01]
        5,  # Max. number of formants [default]
        max_fq,  # Maximum formant (Hz) [default = 5500]
        0.025,  # Window length (s) [default]
        50  # Pre-emphasis from (Hz) [default]
    )
    
    # From the PointProcess object (i.e., 'pulses'), obtain number of points.
    numPoints = pcall(pulses, "Get number of points")
    
    # Create an empty list for each formant
    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = pcall(pulses, "Get time from index", point)
        f1 = pcall(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = pcall(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = pcall(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = pcall(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    if len(f1_list) == 0:
        row['f1_mean'] = 'empty'
    else:
        row['f1_mean'] = np.mean(f1_list)
    if len(f1_list) == 0:
        row['f2_mean'] = 'empty'
    else:
        row['f2_mean'] = np.mean(f2_list)
    if len(f3_list) == 0:
        row['f3_mean'] = 'empty'
    else:
        row['f3_mean'] = np.mean(f3_list)
    if len(f4_list) == 0:
        row['f4_mean'] = 'empty'
    else:
        row['f4_mean'] = np.mean(f4_list)
    
    # calculate median formants across pulses
    if row.f1_mean == 'empty':
        row['f1_mid'] = row.f1_mean
    else:
        row['f1_mid'] = findMiddle(f1_list)
    if row.f2_mean == 'empty':
        row['f2_mid'] = row.f2_mean
    else:
        row['f2_mid'] = findMiddle(f2_list)
    if row.f3_mean == 'empty':
        row['f3_mid'] = row.f3_mean
    else:
        row['f3_mid'] = findMiddle(f3_list)
    if row.f4_mean == 'empty':
        row['f4_mid'] = row.f4_mean
    else:
        row['f4_mid'] = findMiddle(f4_list)
    
    return row

### Measurements dataframe

In [None]:
# For more information on using audiolabel and managing directories, refer to Ronald Sprouse's documentation
# on audiolabel (https://github.com/rsprouse/audiolabel)

# Identify location of data
datadir = './Data'
dirpat = '(?P<subject>S\d+)' # Give each file a subject number, according to file name.

# Identify files of interest
fdf = dir2df(datadir, dirpat=dirpat, fnpat='\.wav$', addcols=['barename','dirname'])
speaker_sex = './speaker_sex.csv'
speaker_sex = pd.read_csv(speaker_sex, encoding = 'utf-8')
fdf = fdf.merge(speaker_sex, on='subject', how='left')

# Identify data (wav and textgrid files)
wav_suffix = '_words.wav'
tg_suffix = '_words_Spanish_aligned.TextGrid'

In [None]:
def func_executor(participant):
    '''
    This function executor applies the functions defined earlier to the data identify in the previous cell.
    '''
    
    print(participant)  # Print participant number to follow progress and isolate potential errors
    
    wav_file = os.path.join(datadir,participant.relpath,participant.relpath+wav_suffix)
    tg_file = os.path.join(datadir,participant.relpath,participant.relpath+tg_suffix)

    psnd = parselmouth.Sound(wav_file) 

    [phdf,wddf] = read_label(tg_file, 'praat')

    word_info = './word_info.csv'  #Identify file's location
    widf = pd.read_csv(word_info, encoding = 'utf-8')  #Create df for said file using UTF-8
    widf.Token = widf.Token.str.upper()  #Make words uppercase to match wdpf
    
    merged_wddf = wddf.merge(widf, left_on='label', right_on='Token', how='left')

    phdf = phdf[(phdf.label=='i')]
    
    phwddf = pd.merge_asof(
        phdf.rename({'t1': 't1_ph', 't2': 't2_ph', 'label': 'label_ph'}, axis='columns'),
        wddf.drop('fname', axis='columns') \
            .rename({'t1': 't1_wd', 't2': 't2_wd', 'label': 'label_wd'}, axis='columns'),
        left_on='t1_ph',
        right_on='t1_wd'
    )

    phwddf = phwddf.assign(sex=participant.sex)
    
    phwddf = phwddf.apply(get_formants, args=([psnd]), axis=1)
        
    return phwddf

In [None]:
# Obtain all measurements

formants_df = pd.concat(fdf.apply(func_executor, axis=1).tolist())

In [None]:
# Identify any empty cells or cells with NaN -- an empty df means there were no issues

formants_df[np.any(formants_df.isna(), axis=1)]

In [None]:
# Safe measurement results as 'formants.csv' under a folder called 'Results'

export_csv = formants_df.to_csv('Results/formants.csv', index=None, header=True)