In [1]:
from collections import defaultdict

from scipy.io import wavfile
from textgrid import TextGrid as TG
from textgrid import Interval
from tqdm import tqdm

import os

In [2]:
root_folder = '../../allwavs/allvowl'

all_files = os.listdir(root_folder)
all_lower_files = [file.lower() for file in all_files]
corpus = defaultdict(list)
final_corpus = {}

variant = True

for file in all_files:
    file_l = file.lower().replace('_1', '').replace('cut', '').replace('-corrected', '')
    if file_l.endswith('wav'):
        corpus[file_l[:-4]].append(file)
    if file_l.endswith('textgrid'):
        corpus[file_l[:-9]].append(file)

for k, v in corpus.items():
    if not all(not x.lower().endswith('textgrid') for x in v) or all(not x.lower().endswith('wav') for x in v):
        wavs = [file for file in v if file.lower().endswith('wav')]
        if len(wavs) > 1:
            wavs = [x for x in wavs if 'cut' in x]
        wav = wavs[0]
        textgrids = [file for file in v if file.lower().endswith('textgrid')]
        if len(textgrids) > 1:
            if variant:
                textgrids = [x for x in textgrids if '_1' in x]
            else:
                textgrids = [x for x in textgrids if 'mono.' in x]
            if k == 'om1_mono':
                textgrids = ['om1_mono.TextGrid']
        textgrid = textgrids[0]
        final_corpus[k] = [wav, textgrid]
    else:
        print(k, v)

IndexError: list index out of range

In [2]:
root_folder = '../../allwavs/allvowl/corrected'

all_files = os.listdir(root_folder)
final_corpus = {}

variant = True

for file in all_files:
    if file.endswith('wav'):
        file_l = file.replace('cut', '')
        final_corpus[file_l.replace('.wav', '')] = (file, file.replace('wav', 'textgrid'))

In [4]:
marks = set()

vowels = {'2', '9', '@', 'E', 'E/', 'O', 'O/', 'U~/', 'a', 'a~', 'e', 'i', 'o', 'o~', 'u', 'y'}

info_dict = {
    'ac1': 'fe',
    'ac2': 'fe',
    'agnes1': 'fE',
    'agnes2': 'fE',
    'al1': 'fe',
    'al2': 'fe',
    'al': 'fe',
    'ang1': 'fe',
    'ang2': 'fe',
    'be1': 'mE',
    'be2': 'mE',
    'cg1': 'fE',
    'cg2': 'fE',
    'clement': 'me',
    'df': 'mE',
    'dj1': 'mE',
    'dj2': 'me',
    'dorian': 'mE',
    'em1': 'mE',
    'em2': 'mE',
    'ev1': 'mE',
    'ev2': 'mE',
    'hzc1': 'fe',
    'hzc2': 'fe',
    'ib1': 'fe',
    'ib2': 'fe',
    'isabel22': 'fE',
    'isabel2': 'fE',
    'jb1': 'mE',
    'jb2': 'mE',
    'kd': 'me',
    'ken1': 'me',
    'laure': 'fe',
    'lb1': 'fE',
    'lb2': 'fE',
    'lp1': 'me',
    'lp2': 'me',
    'lucas': 'me',
    'mat1': 'mE',
    'mathieu2': 'mE',
    'melanie1': 'fE',
    'melanie2': 'fE',
    'mob': 'me',
    'om1': 'fE',
    'om2': 'fE',
    'or1': 'fE',
    'or2': 'fE',
    'oriane-re2': 'fE',
    'oriane1': 'fE',
    'phil1': 'mE',
    'phil2': 'mE',
    'simon1': 'mE',
    'simon2': 'mE',
    'unk1': 'fe',
    'unk2': 'fe',
    'vb1': 'fE',
    'vb2': 'fE',
    'vc1': 'mE',
    'vc2': 'mE',
    'vg': 'mE',
    'vibl1': 'fE',
    'vibl2': 'fE',
    'ya1': 'mE',
    'ya2': 'mE',
    'yf1': 'me',
    'yf2': 'me',
    'yl': 'mE'
}

def trim_wav(in_wav, out_wav, start, end):
    sample_rate, wave_data = wavfile.read(in_wav)
    start_sample = int(start * sample_rate)
    end_sample = int(end * sample_rate)
    wavfile.write(out_wav, sample_rate, wave_data[start_sample:end_sample])

def __hash__(self):
    return hash((self.minTime, self.maxTime, self.mark))

Interval.__hash__ = __hash__

trimmed = False

for k, (wav, textgrid) in tqdm(final_corpus.items()):
    try:
        tg = TG.fromFile(root_folder + '/' + textgrid)
    except AttributeError:
        #print(textgrid)
        continue
    
    prev_p = ''
    two_mode = False
    out_phones = defaultdict(list)
    gender, E_pron = info_dict[k.replace('_mono', '')]
    if len(tg) == 2:
        #print(k)
        #print(k.ljust(20), wav.ljust(20), textgrid)
        phontier, wordtier = tg
        for phone in phontier:
            if phone.mark != '#':
                best_overlap_duration = 0
                best_word = None
                for word in wordtier:
                    overlap_duration = max(0, min(phone.maxTime, word.maxTime) - max(phone.minTime, word.minTime))
                    if overlap_duration > best_overlap_duration:
                        best_overlap_duration = overlap_duration
                        best_word = word
                out_phones[best_word].append(phone)
        for word, phones in out_phones.items():
            word2 = word.mark
            if 'One' in word.mark and word.mark.startswith('t'):
                word2 = word2.replace('One', '1')
                phones = phones[:len(prev)]
                for i, phone in enumerate(phones):
                    phone.mark = prev[i].mark
            elif 'Two' in word.mark:
                phones = phones[:2]
                word2 = word2.replace('Two', '0')

            phonemes = '_'.join([phone.mark for phone in phones])
            if phonemes == prev_p:
                phonemes = phonemes[0] + '1' + phonemes[1:]
                word2 += '1'
                if word.mark == 'tant':
                    two_mode = True
            elif two_mode:
                phonemes = phonemes[0] + '2' + phonemes[1:]
                word2 =  word2.replace('1', '') + '2'
            min_d = phones[0].minTime
            max_d = phones[-1].maxTime

            if phones[1:2]:
                phones = phones[1:2]
            elif phones[0].mark not in vowels:
                continue
            
            phone = phones[0]
            phone.mark = phone.mark.replace('/', '+')
            if k in ['cg1', 'cg2', 'ib1', 'ib2', 'mathieu2', 'melanie1', 'melanie2', 'vb1']:
                if any(x in word.mark for x in ['pé', 'les', 'mes', 'ses', 'tes']):
                    phone.mark = 'E'
                    if 'pé' in word.mark and ('cg' in k or 'melanie' in k or 'vb1' in k) or \
                    ('ses' in word.mark or 'tes' in word.mark) and 'mathieu' in k:
                        phone.mark = 'e'
            if any(x in word.mark for x in ['paix', 'lait', 'mais', 'sait', 'taie']):
                phone.mark = E_pron
            phonemes = phonemes.replace('/', '+')
            add_path = 'trimmed/' if trimmed else ''
            trim_wav(f'{root_folder}/{wav}',
                     f'{root_folder}/extracted/{add_path}{phone.mark}__{gender}__{phone.minTime - min_d:.5f}__{phone.maxTime - min_d:.5f}__{phonemes}__{word2}__{wav}',
                     phone.minTime if trimmed else min_d,
                     phone.maxTime if trimmed else max_d)
            prev = phones
            prev_p = phonemes
            for phone in phones:
                phone.mark = phone.mark.replace('+', '/')

  sample_rate, wave_data = wavfile.read(in_wav)
100%|███████████████████████████████████████████████████████████████████████████████████| 67/67 [03:35<00:00,  3.21s/it]
