In [1]:
import glob
import os
from textgrid import TextGrid

In [2]:
def extract_phones(tg_path):
    """Return the list of phone labels from the phones tier of a TextGrid."""
    tg = TextGrid.fromFile(tg_path)
    # find the first tier named “phones” (case‑insensitive)
    tier = next(t for t in tg.tiers if t.name.lower().startswith("phones"))
    return [iv.mark for iv in tier.intervals if iv.mark.strip()]

def parse_accent(basename):
    """
    Given a base filename like 'mistletoe_en1056809_en.scot',
    splits on '_' and takes the last chunk as the accent code.
    """
    parts = basename.split("_")
    return parts[-1]

def compare_accents(word, aligned_dir):
    """
    Scans aligned_dir for all TextGrids whose basename starts with `word`,
    groups them by accent (parsed from filename), and prints their phone sequences.
    """
    pattern = os.path.join(aligned_dir, f"{word}_*.TextGrid")
    results = {}
    
    for path in glob.glob(pattern):
        base = os.path.basename(path).replace(".TextGrid", "")
        accent = parse_accent(base)
        phones = extract_phones(path)
        results.setdefault(accent, phones)
    
    if not results:
        print(f"No alignments found for word '{word}' in {aligned_dir}")
        return

    # Compute max sequence length for table alignment
    max_len = max(len(seq) for seq in results.values())

    accents = sorted(results.keys())
    print(f"{'Accent':<20} {'Phones':<{max_len * 2}}")
    print("-" * (20 + max_len * 2))
    for accent in accents:
        phones = results[accent]
        phones_str = " ".join(phones)
        print(f"{accent:<20} {phones_str:<{max_len * 2}}")



In [3]:
aligned_dir = "output/en_corpus_example_phones"

compare_accents("carbon", aligned_dir)

Accent               Phones    
------------------------------
en.Irish             k ɑː b ə n
en.Jamaica           k ɑː b ə n
en.scot              k ɑː b ə n
en.uk.Yorkshire      k ɑː b ə n
en.uk.general        k ɑː b ə n
en.uk.rp             k ɑː b ə n
en.us.south          k ɑː b ə n
en.us.us             k ɑː b ə n


-------------------------------------

In [1]:
# Play the file:
# /home/fvalentini/misc/audio_pronunciations/output/wordreference_words/en/chase/audio.en.uk.rp.en014235.mp3

In [6]:
from IPython.display import Audio

# file_audio = "/home/fvalentini/misc/audio_pronunciations/output/wordreference_words/en/chase/audio.en.uk.rp.en014235.mp3"
# file_audio = "/home/fvalentini/misc/audio_pronunciations/output/wordreference_words/es/curso/audio.es.Castellano.es056373.mp3"
file_audio = "/home/fvalentini/misc/audio_pronunciations/output/words_wav/en_examples/sr16000/extraordinary_en032163_en.Irish.wav"
Audio(file_audio, rate=16000)