In [None]:
#| eval: false

# !pip install praatio --upgrade
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
#| eval: false

from praatio import textgrid

# Textgrids take no arguments--it gets all of its necessary attributes from the tiers that it contains.
tg = textgrid.Textgrid()

# IntervalTiers and PointTiers take four arguments: the tier name, a list of intervals or points,
# a starting time, and an ending time.
wordTier = textgrid.IntervalTier('words', [(0,1,'a')], 0, 1.0)


In [None]:
#| eval: false

import os
from os.path import join
from pathlib import Path
import re

from praatio import textgrid
from praatio import audio

from tqdm import tqdm


inputPath = {'audios': f'{Path.home()}/.cache/panda/audios/', 'transcripts': f'{Path.home()}/.cache/panda/transcripts/'}
outputPath = f'{Path.home()}/.cache/panda/textgrids/'

if not os.path.exists(outputPath):
    os.mkdir(outputPath)

v_ix = 0
for fn in tqdm(os.listdir(inputPath['audios'])):
    v_ix += 1
    name, ext = os.path.splitext(fn)
    if ext != ".wav":
        continue
    duration = audio.getDuration(join(inputPath['audios'], fn))
    with open(join(inputPath['transcripts'], f"{name}.txt"), 'r') as f:
        text = f.read()
        utterances = text.split('\n')
        try:
            voices = [re.search('\[(.+?)\]', u.replace('[unsure:]', '---')).group(1).replace(':', '') for u in utterances if u]
            voices = [f"{v}_{v_ix}" for v in voices]
        except:
            print(utterances)
            for u in utterances:
                print(u)
                print(re.search('\[(.+?)\]', u.replace('[unsure:]', '---')))
            break
        utterances = [re.sub('\[.+?\]|-|\.', '', u).strip() for u in utterances]
        ix = 0
        tiers = {}
        for k, utt in zip(voices, utterances):
            end = ix+0.3
            tiers[k] = tiers.get(k, []) + [(ix, end, utt)]
            ix = end + 0.1
    tg = textgrid.Textgrid()
    for k, utt in tiers.items():
        intervals = []
        for start, end, word in utt:
            intervals.append((start, end, word))
        # print((k, intervals, 0, duration))
        # print(f'------------- {name}: {duration}')
        tg.addTier(textgrid.IntervalTier(k, intervals, 0, duration))
    try:
        tg.save(join(outputPath, name + ".TextGrid"), format="short_textgrid", includeBlankSpaces=False)
    except:
        print(outputPath, name)
        print(text.split('\n'))
        print(f"utt: {utt}")
        print(f"voices: {voices}")
        print(f"utterances: {utterances}")
        break

# Did it work?
for fn in os.listdir(outputPath):
    ext = os.path.splitext(fn)[1]
    if ext != ".TextGrid":
        continue
    # print(fn)
print('done')

100%|██████████| 3412/3412 [00:01<00:00, 2977.04it/s]

done





In [None]:
#| eval: false

inputPath = {'audios': f'{Path.home()}/.cache/panda/audios/', 'transcripts': f'{Path.home()}/.cache/panda/transcripts/'}
outputPath = f'{Path.home()}/.cache/panda/txts/'

if not os.path.exists(outputPath):
    os.mkdir(outputPath)

v_ix = 0
for fn in tqdm(os.listdir(inputPath['audios'])):
    v_ix += 1
    name, ext = os.path.splitext(fn)
    if ext != ".wav":
        continue
    with open(join(inputPath['transcripts'], f"{name}.txt"), 'r') as f:
        text = f.read()
        utterances = text.split('\n')
        utterances = [re.sub('\[.+?\]|-|\.', '', u).strip() for u in utterances]
    utterance = ' '.join(utterances)
    with open(join(outputPath, name + ".txt"), 'w') as f:
        f.write(utterance)




100%|██████████| 3412/3412 [00:00<00:00, 9347.73it/s]


In [None]:
#| eval: false

from glob import glob
from pydub import AudioSegment

audio_output = f'{Path.home()}/.cache/panda/audio_slices'
trans_output = f'{Path.home()}/.cache/panda/trans_slices'
grids = glob('output/*.TextGrid')
for grid in grids:
    name = Path(grid).stem
    tg = textgrid.openTextgrid(f"output/{name}.TextGrid", includeEmptyIntervals=False)
    audio = AudioSegment.from_wav(f"{Path.home()}/.cache/panda/audios/{name}.wav")
    for ix, (k, v) in enumerate(tg.tierDict.items()):
        if 'phones' in k:
            continue
        # print(f"{k}-------------")
        for s, e, t in v.entryList:
            start = max(s-0.1, 0)*1000
            end = min(e+0.1, audio.duration_seconds)*1000
            # print(f"{t} => {s}:{e} => {e-s}")
            audio_slice = audio[start:end]
            audio_slice.export(f'{audio_output}/{t}_{ix}_{name}.wav', format="wav")
            with open(f'{trans_output}/{t}_{ix}_{name}.txt', 'w') as f:
                f.write(t)
