# Generating Spanish phonetic correspondence with prosody

The syltippy package (https://github.com/nur-ag/syltippy) will be used to generate syllabified (stress-indicated) outputs for each word found in the transcriptions. Then, the corresponding vowels in the TextGrid-formant dataframes will be marked as either stressed or unstressed.

In [1]:
import pandas as pd
import csv
import re
import numpy as np
from syltippy import syllabize

In [2]:
# create function to take in word input and generate uppercase for stressed syllable

def stress(word):
    syllables, stress = syllabize(word)
    return ','.join(s if stress != i else s.upper() for (i, s) in enumerate(syllables))

# enter word
stress("guerra")

'GUE,rra'

In [27]:
vowels = pd.read_csv("data/allvowels.csv")
vowels.head()

Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,t2_ph,fname,dur_ph,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph
0,p112,a,695.790976,1139.953282,2667.258621,689.324612,840.183174,2738.685252,672.665361,1108.588353,...,0.92,textgrids/cbas/p112.TextGrid,0.1,0.71,1.14,bajo,False,False,b,x
1,p112,o,423.620505,737.502557,2374.974921,438.92995,803.823366,2369.001274,442.314537,785.356913,...,1.14,textgrids/cbas/p112.TextGrid,0.14,0.71,1.14,bajo,False,True,x,sp
2,p112,o,445.805709,917.216321,2507.73028,436.340553,1010.369866,2699.04295,364.458469,830.657803,...,3.146,textgrids/cbas/p112.TextGrid,0.11,2.966,3.416,zombi,False,False,T,m
3,p112,i,299.435659,2081.124207,2566.561621,311.097282,2053.111869,2770.196303,263.790218,1054.379506,...,3.416,textgrids/cbas/p112.TextGrid,0.15,2.966,3.416,zombi,False,True,b,sp
4,p112,i,420.864073,2097.038034,2663.600739,387.560503,2010.116277,2511.430306,372.817182,2205.681484,...,5.191,textgrids/cbas/p112.TextGrid,0.09,4.811,5.401,básico,False,False,s,k


In [28]:
# apply lambda function of 'stress' on the 'word' column

vowels["stress"] = vowels["word"].apply(lambda x : stress(str(x)))
vowels.sample(5)

Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,fname,dur_ph,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph,stress
5664,s053,i,347.381322,1698.057017,2222.267623,353.748001,1529.468746,2094.67531,306.984137,1567.940622,...,textgrids/dime/s05306.TextGrid,0.038,1.789,2.085,primer,False,False,p,m,"pri,MER"
1431,p115,u,514.707414,1816.673541,3068.168973,864.814125,2121.698145,3290.340823,524.661649,1776.184029,...,textgrids/cbas/p115.TextGrid,0.05,225.831,226.531,literatura,False,False,t,rf,"li,te,ra,TU,ra"
5544,s053,e,443.353296,2150.068464,2992.718109,440.780399,2127.740776,2966.490743,397.901926,2115.501728,...,textgrids/dime/s05302.TextGrid,0.052,3.086,3.845,secretarías,False,False,r(,t,"se,cre,ta,RÍ,as"
1094,p113,a,767.063033,1522.129592,2620.072429,738.865595,1392.519959,2675.696603,740.987673,1677.268343,...,textgrids/cbas/p113.TextGrid,0.08,312.326,313.256,abastecer,False,False,b,s,"a,bas,te,CER"
6474,s053,a,668.649374,1797.975441,3160.799935,712.618809,1724.276959,3214.843177,552.215218,1788.216076,...,textgrids/dime/s05341.TextGrid,0.081,3.02,3.633,problemas,False,False,m,s,"pro,BLE,mas"


In [29]:
# create separate column to hold only the vowels in each syllable

vowels["syll_vowels"] = vowels["stress"].apply(lambda x: re.sub(r'[^,aeiouAEIOUáéíóúÁÉÍÓÚ]', '', x))
vowels.sample(5)

Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,dur_ph,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph,stress,syll_vowels
1324,p115,e,573.661819,1119.646093,2157.133393,553.335524,1067.942056,2023.539251,511.850918,1228.449461,...,0.06,150.266,150.896,convalecer,False,False,l,s,"con,va,le,CER","o,a,e,E"
8415,s056,o,436.488665,1398.611869,2874.773627,440.886412,1283.883828,2807.745868,392.447493,1494.943163,...,0.045,2.784,2.995,con,False,False,k,n,CON,O
5524,s053,i,337.882511,2382.600422,2845.622334,308.482857,2665.804695,2869.470763,373.546425,1964.549301,...,0.064,0.386,1.032,activación,False,False,t,b,"ac,ti,va,CIÓN","a,i,a,IÓ"
5342,s051,o,493.791173,1814.799452,2971.592257,483.096573,1780.416983,2975.060736,502.987929,1855.444468,...,0.02,2.445,2.712,tanto,False,True,t,e,"TAN,to","A,o"
5800,s053,i,302.262669,2688.342134,3116.415706,289.358539,2640.277414,2707.898593,353.705975,2594.760703,...,0.061,0.201,0.92,finalizar,False,False,f,n,"fi,na,li,ZAR","i,a,i,A"


In [30]:
# define function to return index of 'vowels' column with stress

def is_stress(word):
    # convert to list
    word = word.split(",")
    stress_vowel = 0
    for syllable in word:
        if syllable.isupper():
            stress_vowel = word.index(syllable)
    return stress_vowel

is_stress("e,I")

1

In [31]:
# create new column which gives vowel number in given word that has stress
vowels["stress_vowel"] = vowels["syll_vowels"].apply(lambda x: is_stress(x))
print(len(vowels))
vowels.sample(5)

9289


Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,t1_wd,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph,stress,syll_vowels,stress_vowel
3099,s001,e,375.74171,2008.750521,2679.453065,355.747066,2060.107292,2733.197591,371.508798,1971.223299,...,2.072,2.139,en,True,False,i,n,EN,E,0
5423,s051,o,678.204733,1374.126619,3167.147612,655.247491,1266.051641,3109.658513,626.129143,1462.794952,...,0.052,0.769,contenido,False,False,k,n,"con,te,NI,do","o,e,I,o",2
7902,s055,i,466.114306,2414.65445,3094.553511,457.118151,2325.494176,2978.996522,444.124643,2175.635525,...,1.691,1.76,y,True,True,s,l,Y,,0
772,p113,e,657.462978,2089.019007,2991.228968,611.695394,2063.827401,3001.536061,633.237193,2083.249791,...,17.994,18.764,vez,False,False,b,s,VEZ,E,0
4785,s051,o,670.50765,1382.829902,2482.134864,290.333837,1033.690283,2963.788634,661.550651,1657.612365,...,1.236,2.01,definitivo,False,True,b,.sil,"de,fi,ni,TI,vo","e,i,i,I,o",3


Now all that's left to do is mark each vowel in the textgrid dataframe according to its index in the word it is produced in. Then, for each vowel index that matches the index given in `stress_vowel`, the vowel will be marked as `stressed`.

In [32]:
vowels["vowel_ind"] = vowels.groupby(["Participant", "t1_wd"]).cumcount()
vowels = vowels.reset_index(drop = True)
print(len(vowels))
vowels.head(5)

9289


Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph,stress,syll_vowels,stress_vowel,vowel_ind
0,p112,a,695.790976,1139.953282,2667.258621,689.324612,840.183174,2738.685252,672.665361,1108.588353,...,1.14,bajo,False,False,b,x,"BA,jo","A,o",0,0
1,p112,o,423.620505,737.502557,2374.974921,438.92995,803.823366,2369.001274,442.314537,785.356913,...,1.14,bajo,False,True,x,sp,"BA,jo","A,o",0,1
2,p112,o,445.805709,917.216321,2507.73028,436.340553,1010.369866,2699.04295,364.458469,830.657803,...,3.416,zombi,False,False,T,m,"ZOM,bi","O,i",0,0
3,p112,i,299.435659,2081.124207,2566.561621,311.097282,2053.111869,2770.196303,263.790218,1054.379506,...,3.416,zombi,False,True,b,sp,"ZOM,bi","O,i",0,1
4,p112,i,420.864073,2097.038034,2663.600739,387.560503,2010.116277,2511.430306,372.817182,2205.681484,...,5.401,básico,False,False,s,k,"BÁ,si,co","Á,i,o",0,0


In [33]:
vowels["stress"] = np.where(vowels['stress_vowel'] == vowels['vowel_ind'], "stressed", "unstressed")
print(len(vowels))
vowels.head(5)

9289


Unnamed: 0,Participant,Vowel,F1.50,F2.50,F3.50,F1.25,F2.25,F3.25,F1.75,F2.75,...,t2_wd,word,is_wdinit_ph,is_wdfin_ph,prev_ph,next_ph,stress,syll_vowels,stress_vowel,vowel_ind
0,p112,a,695.790976,1139.953282,2667.258621,689.324612,840.183174,2738.685252,672.665361,1108.588353,...,1.14,bajo,False,False,b,x,stressed,"A,o",0,0
1,p112,o,423.620505,737.502557,2374.974921,438.92995,803.823366,2369.001274,442.314537,785.356913,...,1.14,bajo,False,True,x,sp,unstressed,"A,o",0,1
2,p112,o,445.805709,917.216321,2507.73028,436.340553,1010.369866,2699.04295,364.458469,830.657803,...,3.416,zombi,False,False,T,m,stressed,"O,i",0,0
3,p112,i,299.435659,2081.124207,2566.561621,311.097282,2053.111869,2770.196303,263.790218,1054.379506,...,3.416,zombi,False,True,b,sp,unstressed,"O,i",0,1
4,p112,i,420.864073,2097.038034,2663.600739,387.560503,2010.116277,2511.430306,372.817182,2205.681484,...,5.401,básico,False,False,s,k,stressed,"Á,i,o",0,0


Now, create a function that will take as an input a dictionary dataframe with columns `word` and `ipa` and will also take as input a dataframe of vowel productions (from formants and TextGrid), also with a column `word` and columns `Participant`, `t1_wd`, and `t1_ph`. The below function requires that the formant dataframe only include vowel formants (i.e. non vowels are exluded).

In [34]:
def get_stress(vowels):
    # import required packages
    import pandas as pd
    import csv
    import re
    import numpy as np
    from syltippy import syllabize
    
    # def fxn to create stress column in dictionary
    def stress(word):
        syllables, stress = syllabize(word)
        return ','.join(s if stress != i else s.upper() for (i, s) in enumerate(syllables))
    
    # add column to dictionary
    vowels["stress_syll"] = vowels["word"].apply(lambda x : stress(str(x)))
    
    # create separate column to hold only the vowels in each word
    vowels["syll_vowels"] = vowels["stress_syll"].apply(lambda x: re.sub(r'[^,aeiouAEIOUáéíóúÁÉÍÓÚ]', '', x))
    
    # define function to return index of 'vowels' column with stress
    def is_stress(word):
        # convert to list
        word = word.split(",")
        stress_vowel = 0
        for syllable in word:
            if syllable.isupper():
                stress_vowel = word.index(syllable)
        return stress_vowel
    
    # create new column which gives vowel number in given word that has stress
    vowels["stress_vowel"] = vowels["syll_vowels"].apply(lambda x: is_stress(x))
    
    # determine index of vowel in df
    vowels["vowel_ind"] = vowels.groupby(["Participant", "t1_wd"]).cumcount()
    vowels = vowels.reset_index(drop = True)
    
    # add column to formants to indicate stress
    vowels["stress"] = np.where(vowels['stress_vowel'] == vowels['vowel_ind'], "stressed", "unstressed")
    
    # drop unnecesary columns
    vowels = vowels.drop(["syll_vowels", "stress_syll"], axis = 1)
       
    return vowels