In [None]:
#| default_exp loading

In [None]:
#| hide
from nbdev.showdoc import *

# loading
> This module contains the functions used to load in the data that will be used by the TTSDataset

In [None]:
#| export 
from fastai.data.transforms import FileGetter
from pathlib import Path
import tgt
import librosa

In [None]:
#| hide
from pathlib import Path

In [None]:
#| hide
path = Path("../../data/LJSpeech-1.1")

## audio

In [None]:
#| export
def get_audio_files(dir_path: str): # Path of directory containing ```.wav``` files
    return FileGetter(extensions='.wav')(dir_path)

In [None]:
file_paths = get_audio_files(path)
file_path = file_paths[0]
file_path, len(file_paths)

(Path('../../data/LJSpeech-1.1/wavs/LJ028-0259.wav'), 13082)

In [None]:
#| export
def load_audio(file_path: str, # Path to a file with ```.wav``` extension
               sr: int): # Sampling rate
    wav, _ = librosa.load(file_path, sr=sr)
    return wav

In [None]:
wav = load_audio(file_path, 22050)
wav.shape

(144029,)

## phones and durations

In [None]:
#| export
def replace_extension(path: str, # File path
                      extension: str): # New extension
    return Path(path).with_suffix(extension)

In [None]:
tgt_path = replace_extension(file_path, ".TextGrid")
tgt_path

Path('../../data/LJSpeech-1.1/wavs/LJ028-0259.TextGrid')

In [None]:
#| export
def load_tiers(file_path: str): # Path to a ```.TextGrid``` file
    return tgt.io.read_textgrid(file_path).get_tier_by_name("phones")

In [None]:
tiers = load_tiers(tgt_path)
tiers[0]

Interval(0.0, 0.03, "DH")

In [None]:
#| export
def get_phones_and_durations(tgt_path: str, # Path to a ```.TextGrid``` file
                             sr: int, # Waveform sampling rate
                             hl: int): # Spectrogram hop length
    tiers = load_tiers(tgt_path)
    phones, durations = zip(*map(lambda x: (x.text, x.duration()*sr / hl), tiers))
    return phones, durations

In [None]:
phones, durations = get_phones_and_durations(tgt_path, 22050, 256)
phones[:3], durations[:3]

(('DH', 'AE1', 'T'), (2.583984375, 11.197265625, 7.751953125))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()