In [None]:
import glob
import math
import os
import re

import conll
import numpy as np
import parselmouth
import tgt

from nltk.corpus import cmudict

### SLAM features

In [None]:
def dico2FeatureString(dico):
    """Transforms a dictionary of Conll features into a string"""
    feature = ["=".join([key, str(dico[key])]) for key in dico]
    feature = "|".join(feature)
    return feature

def build_feature_dico(misc_features_string):
    """Turns a string of CONLL features into a callable dictionary"""
    feature_dico = {}
    for feature in misc_features_string.split("|"):
        key, value = feature.split("=")
        feature_dico[key] = value
    return feature_dico

def confirm_alignment(ref_interval, target_interval):
    """Takes two pitchtier intervals as entry and returns whether or not there is a temporal overlap, as well as the nature of that overlap."""
    
    left_overlap = False
    right_overlap = False

    
    #print(ref_interval, target_interval)
    if ref_interval.text == "":
        # print("empty string") # debug
        return False, left_overlap, right_overlap  # disregard pauses

    if round(target_interval.end_time - ref_interval.start_time,3) <= 0.001:
        #print("A") # debug
        return False, left_overlap, right_overlap  
    
    elif round(target_interval.start_time - ref_interval.end_time,3) >= -0.001:
        #print("B") # debug 
        return False, left_overlap, right_overlap  
    
    if  round(ref_interval.start_time - target_interval.start_time, 3) >= 0.01:
        #print("C", str(ref_interval.start_time - target_interval.start_time)) # debug
        left_overlap = True
    if round(target_interval.end_time - ref_interval.end_time, 3)  >= 0.01:
        #print("D") # debug
        right_overlap = True

    return True, left_overlap, right_overlap 



def vowel_final(word_string, dico):
    """Takes as entry an orthographic word string and a pronunciation dictionary, and confirms whether or not the canonical pronunciation of a that word ends in a vowel"""
    vowels="aeiouAEIOU" 
    if word_string == "":
        return False
    try:
        phonemes = pronunciation[word_string.lower()][0]
        if phonemes[-1][0] in vowels:
            return True
    except:
        if word_string[-1] in vowels:
            return True
    return False
        
def extractProsodicAnnotation(textgrid, label_tier, label_tier2, token_tier, word_text_tier, syllable_transcription_tier):
    """
    Extracts prosodic information from a .TextGrid file containing SLAM annoations and returns a dictionary of annotations.

    Takes as entry:
    * A textgrid file containing tiers corresponding to the following elements.
    * Name of the tier containing phonetic transcriptions of syllables (syllable_transcription_tier)
    * Names of two tiers containing global (label_tier) and local (label_tier2) SLAM annotations of syllables 
    * Name of the tier containing a token-level alignment and numeric ID in format 2:13 (utterance two, token 13) (token_tier)
    * Name of tier containing orthographic transcription of token (word_text_tier)
    """

    pronunciation = cmudict.dict()
    dico = {}
    
    try: textgrid_object = tgt.read_textgrid(textgrid, include_empty_intervals=True, encoding="utf-8")
    except: textgrid_object = tgt.read_textgrid(textgrid, include_empty_intervals=True, encoding="utf-16")
    label_obj = textgrid_object.get_tier_by_name(label_tier)
    label_obj2 = textgrid_object.get_tier_by_name(label_tier2)
    token_object = textgrid_object.get_tier_by_name(token_tier)  
    word_text_object = textgrid_object.get_tier_by_name(word_text_tier)
    syllable_transcription_object = textgrid_object.get_tier_by_name(syllable_transcription_tier)
    
    i=0
    last_i = 0
    for index, token in enumerate(token_object): 
        
        skip_features = False
        
        alignment_found = False
        #alignment = False
        syl_number = 1
        
        if not token.text: continue
        
        dico[token.text] = {}
        dico[token.text]["AlignBegin"] = str(int(token.start_time*1000))
        dico[token.text]["AlignEnd"] = str(int(token.end_time*1000))
        
        ### Debugging/holdover from earlier version ###
        #dico[token.text]["LeftOverlap"] = "False"
        #dico[token.text]["RightOverlap"] = "False"
        
        alignment, left_overlap, right_overlap = confirm_alignment(token, label_obj[i])
        #print(alignment, left_overlap, right_overlap) # debug
        #print(token) # debug 
        while not alignment and i < len(label_obj) - 1:
            i+=1 
            #print(i) # debug 
            alignment, left_overlap, right_overlap = confirm_alignment(token, label_obj[i])
            #print(alignment, left_overlap, right_overlap) # debug

        if not alignment:
            # If no alignment is found, skip the token and move on to the next one
            i = last_i
            continue

        last_i = i
        while alignment:
            skip_features = False
            if left_overlap:
                 ### Debug ###
                #print(dico[token.text])
                #print(dico[token_object[index-1].text])
                #print(token.text, token_object[index-1].text)
                #print(token, label_obj[i])
                #print(token_object[index-1].text)
                #print(left_overlap)

                try: 
                    if vowel_final(word_text_object[index-1].text, pronunciation): 
                        dico[token.text]["Syl"+str(syl_number)] = "FUSED"
                        skip_features = True

                    ### Handles rare cases of triple fusions betweeen syllables

                    elif dico[token_object[index-1].text]["Syl1"] == "FUSED" and "Syl2" not in dico[token_object[index-1].text]:
                        dico[token.text]["Syl"+str(syl_number)] = "FUSED"
                        skip_features = True

                    elif "Syl1AlignBegin" in dico[token_object[index-1].text] and str(dico[token_object[index-1].text]['Syl1AlignBegin']) == str(int(label_obj[i].start_time*1000)):
                        dico[token.text]["Syl"+str(syl_number)] = "FUSED"
                        skip_features = True

                    else:
                        dico[token.text]["Syl"+str(syl_number)+"ExternalOnset"] = "True"
                except:
                    print("problem on {}, confirm output".format(label_obj[i]))

                #dico[token.text]["LeftOverlap"] = "True"
                
            if right_overlap:
                if vowel_final(word_text_object[index].text, pronunciation) != True and label_obj[i].start_time > token.start_time:  
                    skip_features = True

                #dico[token.text]["RightOverlap"] = "True"
                
            if skip_features == False:
            
                if label_obj[i].text:
                    dico[token.text]["Syl"+str(syl_number)+"Glo"] = label_obj[i].text
                    dico[token.text]["Syl"+str(syl_number)+"Loc"] = label_obj2[i].text
                    dico[token.text]["Syl"+str(syl_number)+"Duration"] = str(int((label_obj[i].end_time - label_obj[i].start_time)*1000))
                    
                    feature_dico = contourToFeatures(label_obj[i].text, "Glo")
                    feature_dico.update(contourToFeatures(label_obj2[i].text, "Loc"))

                    for feature in feature_dico.keys():
                        dico[token.text]["Syl"+str(syl_number)+feature] = feature_dico[feature]
                    dico[token.text]["SyllableCount"] = str(syl_number)

                    #dico[token.text]["SylStart"] = label_obj[i].start_time

                else:
                    if "#" not in word_text_object[index].text:
                        dico[token.text]["Syl"+str(syl_number)+"Glo"] = "X"
                        dico[token.text]["Syl"+str(syl_number)+"Loc"] = "X"
                        #dico[token.text]["SylStart"] = label_obj[i].start_time
                    
                if "#" not in word_text_object[index].text:
                    dico[token.text]["Syl"+str(syl_number)+"AlignBegin"] = str(int(label_obj[i].start_time*1000))
                    dico[token.text]["Syl"+str(syl_number)+"AlignEnd"] = str(int(label_obj[i].end_time*1000))

                    dico[token.text]["Syl"+str(syl_number)] = syllable_transcription_object[i].text


            syl_number+=1
            i+=1
            if i == len(label_obj):
                break
                
            alignment, left_overlap, right_overlap = confirm_alignment(token, label_obj[i])
            #print(alignment, left_overlap, right_overlap)

        
        i-=1
        

                
    return dico

def translate_code(code):
    """Takes in entry a textual SLAM label and converts it into a list of numeric values to facilitate extraction of features"""

    translated_code = []
    for letter in code[0:2]:
        if letter == "L":
            translated_code.append(1)
        elif letter == "l":
            translated_code.append(2)
        elif letter == "m":
            translated_code.append(3)
        elif letter == "h":
            translated_code.append(4)
        elif letter == "H":
            translated_code.append(5)

    if len(code) > 2:
        if code[2] == "L":
            translated_code.append((1, int(code[3])))
        elif code[2] == "l":
            translated_code.append((2, int(code[3])))
        elif code[2] == "m":
            translated_code.append((3, int(code[3])))
        elif code[2] == "h":
            translated_code.append((4, int(code[3])))
        elif code[2] == "H":
            translated_code.append((5, int(code[3])))

    return translated_code
            
def contourToFeatures(contour_label, suffix=""):
    """Takes as entry a textual SLAM label and returns a set of categorical prosodic features describing the label."""
    
    dico = {}

    code = translate_code(contour_label)

    if code[0] == code[1]:
        dico["Slope"+suffix] = "Flat"
    elif code[0] < code[1]:
        dico["Slope"+suffix] = "Rise"
    elif code[0] > code[1]:
        dico["Slope"+suffix] = "Fall"

    if len(code) == 2:
        height = (code[0] + code[1]) / 2
    if len(code) == 3:
        height = (code[0] + code[1] + code[2][0]) / 3

    if height >= 3.5:
        dico["AvgHeight"+suffix] = "H"
    elif height < 2.5:
        dico["AvgHeight"+suffix] = "L"
    else: 
        dico["AvgHeight"+suffix] = "M"

    amplitude = abs(code[0] - code[1])
    if amplitude <= 1:
        dico["PitchRange"+suffix] = "L"
    elif amplitude >= 3:
        dico["PitchRange"+suffix] = "H"
    else: 
        dico["PitchRange"+suffix] = "M"
        
    return dico

In [None]:
def extract_trees_and_metadata(file_path: str) -> tuple:
    """Extracts trees and their metadata from a CoNLL file."""
    trees = conll.conllFile2trees(file_path)
    file_name = os.path.basename(file_path)

    metadata = []
    for tree in trees:
        tree_str = str(tree)
        sent_id_match = re.search(r"# sent_id = (.+)", tree_str)
        sent_id = sent_id_match.group(1) if sent_id_match else "_"
        words = tree.words
        metadata.append((tree, sent_id, words))

    return trees, file_name, metadata

In [None]:
def extract_values_from_pitchtier(
    file_content: list, align_begin: int, align_end: int
) -> list:
    """
    Extracts pitch values from a pitchtier file.
    
    Takes as entry:
    * A list of lines from a pitchtier file
    * Two integers representing the beginning and end of the alignment interval
    """
    values = []
    numbers = []

    for ligne in file_content:
        if "number =" in ligne:
            number = float(ligne.split("=")[1])
        elif "value =" in ligne:
            value = float(ligne.split("=")[1])
            if align_begin <= number <= align_end:
                values.append(value)
                numbers.append(number)
    return values, numbers

def is_number(s: str) -> bool:
    """Checks if a string can be converted into a float."""
    try:
        float(s)
        return True
    except ValueError:
        return False
    
def extract_pitchtier_infos(sent_begin: float, sent_end: float, tok_tree: dict, pitchtier_file: str) -> dict:
    """
    Extracts pitch values from a pitchtier file for a given sentence.
    
    Takes as entry:
    * The beginning and end of the alignment interval for the sentence
    * A token tree
    * The path to a pitchtier file
    """
    sent_align_begin = sent_begin
    sent_align_end = sent_end
    file_content = []
    sent_form_values = []
    sent_form_numbers = []

    # Ensure sent_align_begin and sent_align_end are floats
    if sent_align_begin and sent_align_end and is_number(sent_align_begin) and is_number(sent_align_end):
        # print("sent_align_begin", sent_align_begin)
        sent_align_begin = float(sent_align_begin) / 1000
        sent_align_end = float(sent_align_end) / 1000

        with open(pitchtier_file, "r") as file:
            file_content = file.readlines()

        sent_form_values, sent_form_numbers = extract_values_from_pitchtier(
            file_content, sent_align_begin, sent_align_end
        )

    syl_align_begin_values = []
    syl_align_end_values = []
    Syls_infos = {}

    misc_dict = build_feature_dico(tok_tree["misc"])

    for i in range(1, 9):
        syl_key = f"Syl{i}"
        if syl_key in tok_tree["misc"]:
            align_begin_key = f"{syl_key}AlignBegin"
            align_end_key = f"{syl_key}AlignEnd"
            if align_begin_key in misc_dict and misc_dict[align_begin_key] != "_":
                syl_align_begin_values.append(float(misc_dict[align_begin_key]) / 1000)
                syl_align_end_values.append(float(misc_dict[align_end_key]) / 1000)

    for i, (align_begin, align_end) in enumerate(
        zip(syl_align_begin_values, syl_align_end_values)
    ):
        if align_begin > 0:
            if file_content:  # Ensure file_content is not empty before processing
                values, numbers = extract_values_from_pitchtier(
                    file_content, align_begin, align_end
                )
                Syls_infos[f"Syl{i+1}"] = {
                    "Alignbegin": align_begin,
                    "Alignend": align_end,
                    "values": values,
                    "numbers": numbers,
                }

    sent_pitchtier_infos = {
        "Alignbegin": sent_align_begin,
        "Alignend": sent_align_end,
        "values": sent_form_values,
        "numbers": sent_form_numbers,
    }

    return Syls_infos, sent_pitchtier_infos

### Semitones

In [None]:
def semitones_between(frequency1: float, frequency2: float) -> float:
    """Calculates the number of semitones between two frequencies."""
    if frequency1 <= 0 or frequency2 <= 0:
        return None

    if frequency1 == frequency2:
        return 0

    ratio = frequency1 / frequency2
    semitones = 12 * math.log2(ratio)

    return round(semitones, 3)

def hertz_semiton_data(syls_infos: dict, sent_pitchtier_infos: dict, token_data) -> dict:
    """Calculates the mean F0 and semitones between syllables and sentence for a given token."""
    sent_form_values = sent_pitchtier_infos["values"]
    sent_form_numbers = sent_pitchtier_infos["numbers"]
    moyenne_sent = (
        sum(sent_form_values) / len(sent_form_values) if sent_form_values else 0
    )

    for syl_key, syl_data in syls_infos.items():
        values = syl_data.get("values", [])
        if values:
            moyenne_syl_hertz = sum(values) / len(values)
            syl_data["MeanF0"] = round(moyenne_syl_hertz, 3)
            syl_data["SemitonesFromUtteranceMean"] = semitones_between(
                moyenne_syl_hertz, moyenne_sent
            )
        else:
            syl_data["MeanF0"] = 0
            syl_data["SemitonesFromUtteranceMean"] = 0

    sent_pitchtier_infos["MoyenneSentHertz"] = moyenne_sent

    if "root" in token_data.values():
        f0_enonce = float(moyenne_sent)
        f0_enonce_rounded = round(f0_enonce, 3)
        sent_pitchtier_infos["UtteranceMeanF0"] = f0_enonce_rounded

    return syls_infos, sent_pitchtier_infos

### Slope

In [None]:
def slope_data(syls_infos: dict) -> dict:
    """Calculates the slope of the glissando between the first and last syllables of a token."""
    for syl_key, syl_data in syls_infos.items():
        sec = syl_data["numbers"]
        hertz = syl_data["values"]

        if sec and hertz:
            coordonnee = (sec[0], sec[-1], hertz[0], hertz[-1])
            seuil_glissando = 0.16 / (sec[-1] - sec[0]) if sec[-1] - sec[0] != 0 else 0
            semiton = semitones_between(hertz[0], hertz[-1])

            if abs(semiton) > seuil_glissando and semiton > 0:
                slope = "Rise"
            elif abs(semiton) > seuil_glissando and semiton < 0:
                slope = "Fall"
            elif semiton == 0 and seuil_glissando == 0:
                slope = "X"
            else:
                slope = "Flat"

            syl_data[f"Coordonnee"] = coordonnee
            syl_data[f"Semiton"] = semiton
            syl_data[f"Slope"] = slope
        else:
            syl_data[f"Coordonnee"] = "X"
            syl_data[f"Semiton"] = "X"
            syl_data[f"Slope"] = "X"

    return syls_infos

### Amplitude

In [None]:
def calculate_amplitudes(audio_file: str, start_time: float, end_time: float):
    """Calculates the maximum and average amplitudes between two time points in an audio file."""
    try:
        # Ensure the audio file path is a string, not a list
        if isinstance(audio_file, list):
            audio_file = audio_file[0]
        
        sound = parselmouth.Sound(audio_file)
        
        # Ensure start_time and end_time are within the duration of the sound
        if start_time < 0 or end_time > sound.get_total_duration():
            raise ValueError("Start time or end time is out of the sound file's duration.")
        
        segment = sound.extract_part(from_time=start_time, to_time=end_time)
        
        amplitudes = segment.to_intensity().values.T
        max_amp = np.max(amplitudes)
        avg_amp = np.mean(amplitudes)
        
        return round(max_amp, 3), round(avg_amp, 3)
    except Exception as e:
        # print("Error calculating amplitudes:", e)
        return 0, 0


def amplitude_data(syls_infos: dict, sent_infos: dict, audio_file: str):
    """Calculates the maximum and average amplitudes for syllables and sentences in a token."""
    token_start_time = None
    token_end_time = None
    
    for syl_key, syl_data in syls_infos.items():
        if "Alignbegin" in syl_data and "Alignend" in syl_data:
            if token_start_time is None:
                token_start_time = syl_data["Alignbegin"]
            token_end_time = syl_data["Alignend"]

            start_time = syl_data["Alignbegin"]
            end_time = syl_data["Alignend"]
            
            result = calculate_amplitudes(audio_file, start_time, end_time)
            if result:
                max_amp, avg_amp = result
                syl_data["MaxAmplitude"] = max_amp
                syl_data["AvgAmplitude"] = avg_amp
            else:
                print(f"Failed to calculate amplitudes for syllable {syl_key}")

    if token_start_time is not None and token_end_time is not None:
        result = calculate_amplitudes(audio_file, token_start_time, token_end_time)
        if result:
            max_amp, avg_amp = result
            sent_infos["TokenMaxAmplitude"] = max_amp
            sent_infos["TokenAvgAmplitude"] = avg_amp
        else:
            print("Failed to calculate amplitudes for the entire token")

    return syls_infos, sent_infos

# Definition of parameters

In [None]:
pronunciation = cmudict.dict() # Pronunciation dictionary: to modify according to the language chosen
syl_tier = "SyllablesStyleGlo"  # Tier containing global contour
syl_tier2 = "SyllablesStyleLoc"  # Tier containing local contour
word_tier = "Word-ID"  # Tier containing numeric ID for token
word_text_tier = "Word-Text"  # Tier containing text token text
syllable_text_tier = "Syllables"  # Tier containing syllabic transcriptions
slam_files = glob.glob("SLAM_output/*.TextGrid")  # Folder containing SLAM labels in TextGrid format
conll_infiles = glob.glob("CONLL_files/*.conllu")  # Folder containing CONLLU files to which prosodic information will be added

conll_outfolder = "CONLL_outfiles/" # Folder where the new CONLLU files will be saved

pitchtier_infiles = glob.glob("PITCHTIER/*.PitchTier")  # Folder containing pitchtier files
audio_infiles = glob.glob("WAV/*.wav")  # Folder containing audio files

# If running this script a second time, useful for renaming features
feature_rename_dict = {"OldFeatname": "NewFeatname"}

if not os.path.exists(conll_outfolder):
    os.makedirs(conll_outfolder)

# Fill CONLLU files

In [None]:
for slam_file in sorted(slam_files):
    basename = os.path.basename(slam_file)[:-9]
    pitchtier_files = [file for file in pitchtier_infiles if basename in file]
    if not pitchtier_files:
        # print(f"No pitchtier file found for {basename}")
        continue
    pitchtier_file = pitchtier_files[0]

    audio_file = [file for file in audio_infiles if basename in file]
    if not audio_file:
        # print(f"No audio file found for {basename}")
        continue

    print("treating", basename)
    annotations = extractProsodicAnnotation(
        slam_file,
        "SyllablesStyleGlo",
        "SyllablesStyleLoc",
        "Word-ID",
        "Word-Text",
        "Syllables",
    )
    
    # list of conllu files to be treated
    conllu_outfiles = []

    for infile in conll_infiles:
        
        if os.path.basename(infile)[:len(basename)] == basename:
            trees, _, metadata = extract_trees_and_metadata(infile)
            for treei, (tree, _, _) in enumerate(metadata):
                for token in tree:
                    identifier = str(treei+1)+":"+str(token)
                    misc_features = tree[token]['misc']
                    feature_dico = build_feature_dico(misc_features)
                    
                    
                    features_to_delete = []
                    
                    new_feature_dico = {}
                    for feature in feature_dico.keys():
                        #print(feature)
                        if re.match("Syl[0-9].*", feature):
                            if "Amplitude" not in feature:
                                features_to_delete.append(feature)
                            #print(features_to_delete)
                            
                        #print(feature)
                        if feature[4:] in feature_rename_dict.keys():
                            newfeat = feature.replace(feature[4:], feature_rename_dict[feature[4:]])
                            #print(feature, newfeat)
                            newval = feature_dico[feature]
                            new_feature_dico[newfeat] = newval
                            features_to_delete.append(feature)
                            #print(feature_dico)

                        """"   
                        if feature == "F0Enonce":
                            newfeat = feature.replace(feature, feature_rename_dict[feature])
                            #print(feature, newfeat)
                            newval = feature_dico[feature]
                            new_feature_dico[newfeat] = newval
                            features_to_delete.append(feature)
                            #print(feature_dico)
                        """
                            
                    for feature in new_feature_dico.keys():
                        feature_dico[feature] = new_feature_dico[feature]
                        
                    
                    for feature in features_to_delete:
                        if feature in feature_dico.keys():
                            del feature_dico[feature]
                    #print(feature_dico)
                    
                        
                    #print(feature_dico)
                    if identifier in annotations.keys():
                        for item in annotations[identifier]:
                            #print(identifier, item)
                            feature_dico[item] = annotations[identifier][item]
                        
                        if "Syl1" in feature_dico.keys() and feature_dico["Syl1"] == "FUSED":
                            #print("hi")
                            if "Syl1Duration" in feature_dico.keys():
                                del feature_dico["Syl1Duration"]
                            if "Syl1MeanF0" in feature_dico.keys():
                                del feature_dico["Syl1MeanF0"]
                            if "Syl1SemitonesFromUtteranceMean" in feature_dico.keys():
                                del feature_dico["Syl1SemitonesFromUtteranceMean"]
                        #print(identifier)
                        #print(feature_string)
                        feature_string = dico2FeatureString(feature_dico)
                        tree[token]['misc'] = feature_string
                    
                    """
                    identifier = str(treei+1)+":"+str(token)
                    misc_features = tree[token]['misc']
                    
                    if identifier in annotations.keys():              
                        for item in annotations[identifier]:
                            misc_features = misc_features + "|"
                            featstring = item+"="+annotations[identifier][item]
                            misc_features = misc_features + featstring
                    tree[token]['misc'] = misc_features
                    #print(tree[token]['misc'])
                    """

            output_file = os.path.join(conll_outfolder, os.path.basename(infile))
            conll.trees2conllFile(trees, output_file)
            conllu_outfiles.append(output_file)
            
    for infile in conllu_outfiles:
        if os.path.basename(infile)[: len(basename)] == basename:
            # print("CoNLL outfile :", infile)
            trees, _, metadata = extract_trees_and_metadata(infile)

            for treei, (tree, _, _) in enumerate(metadata):
                tokens = []

                for tok in tree:
                    tokens.append(tree[tok])
                    feature_dico = build_feature_dico(tree[tok]["misc"])

                if tokens:
                    sent_align_begin = tokens[0].get("misc", "").split("|")[0].split("=")[1]
                    sent_align_end = tokens[-1].get("misc", "").split("|")[1].split("=")[1]

                    for token_data in tokens:
                        feature_dico = build_feature_dico(token_data["misc"])
                        if "PUNCT" not in token_data["tag"]:
                            syls_infos, sent_infos = extract_pitchtier_infos(
                                sent_align_begin,
                                sent_align_end,
                                token_data,
                                pitchtier_file,
                            )

                            syls_infos, sent_infos = hertz_semiton_data(syls_infos, sent_infos, token_data["gov"])
                            syls_infos = slope_data(syls_infos)
                            syls_infos, sent_infos = amplitude_data(syls_infos, sent_infos, audio_file)

                            for syl_key, syl_info in syls_infos.items():
                                for key, value in syl_info.items():
                                    if key not in ["Coordonnee", "values", "numbers", "Alignbegin", "Alignend"]:
                                        feature_dico[f"{syl_key}{key}"] = value

                            if token_data["gov"] == {0: "root"}:
                                if "MaxAmplitude" in sent_infos:
                                    feature_dico["MaxAmplitude"] = sent_infos["MaxAmplitude"]
                                if "AvgAmplitude" in sent_infos:
                                    feature_dico["AvgAmplitude"] = sent_infos["AvgAmplitude"]
                                if "UtteranceMeanF0" in sent_infos:
                                    feature_dico["UtteranceMeanF0"] = sent_infos["UtteranceMeanF0"]

                        feature_string = dico2FeatureString(feature_dico)
                        token_data["misc"] = feature_string

            conll.trees2conllFile(trees, infile)

print('done')