In [2]:
import textgrid
import pathlib
import pandas as pd
import numpy as np

from pathlib import Path
from pydub import AudioSegment
from typing import Union, List, Dict



In [2]:
def get_phone_fragments(
        audio_file_path: str,
        annotation_file_path: str,
        arpabet_phone_code: Union[List[str], str] = None
) -> List[bytes]:
    if isinstance(arpabet_phone_code, str):
        arpabet_phone_code = [arpabet_phone_code]

    tg_labels = textgrid.TextGrid.fromFile(annotation_file_path)
    wav_file = AudioSegment.from_wav(audio_file_path)
    wav_fragments = list()

    for interval in tg_labels[1]:
        if arpabet_phone_code is not None and interval.mark not in arpabet_phone_code:
            continue
        start = interval.minTime * 1000
        end = interval.maxTime * 1000
        wav_fragments.append(wav_file[start:end].raw_data)

    return wav_fragments


In [50]:
get_phone_fragments(
    audio_file_path='arctic_a0003.wav',
    annotation_file_path='arctic_a0003.TextGrid',
    arpabet_phone_code='AO1'
)

[b'\x00\xf0\xa8\xf1E\xf3\xcc\xf4\x9b\xf6w\xf8\x89\xfa\xa5\xfc\xc5\xfe\x03\x01\x82\x03\xef\x05\x84\x08\xfc\nD\r\xa8\x0f\xeb\x11\x1f\x14\xf2\x15\xc3\x17b\x19\x08\x1b\xa1\x1c\xf7\x1d\xda\x1e\xbd\x1f\xcc \x9a!!"\x8c"\xc7"\xfa"c#\xb2#\xe6#\xb0#v#\x02#l"o!0 \xd1\x1eo\x1d\xf4\x1bD\x1a\xb4\x18+\x17\xb0\x15R\x14\xc9\x125\x11\xaf\x0f>\x0e\xdf\x0ch\x0b\xc3\t.\x08\xa6\x06!\x05\xaf\x03:\x02\xa9\x00/\xff\xe2\xfd\xb1\xfcS\xfb\t\xfa\n\xf9;\xf8\x98\xf7\xfd\xf6?\xf6\xb8\xf5\x81\xf5&\xf5\xee\xf4k\xf4\xe3\xf3]\xf3\x03\xf3\xb0\xf2\x8c\xf2Q\xf21\xf2|\xf2\x05\xf3\xb3\xf3`\xf4\xf8\xf4\xc5\xf5\x90\xf6F\xf7\xdf\xf7\x99\xf8"\xf9\xc1\xf9q\xfaF\xfb\r\xfc\xfe\xfc\xe7\xfd\xce\xfe\xc9\xff\xc2\x00\xb2\x01\xc7\x02\xb0\x03\xbf\x04\xf9\x05\x12\x07C\x08I\t)\n\xe6\n\x8a\x0b\x12\x0c\xa9\x0c\xf2\x0cT\r\xb6\r\xec\r>\x0e\xad\x0e\n\x0fr\x0f\xc7\x0f\x0f\x10s\x10y\x10\x8d\x10z\x10g\x10E\x10\xe2\x0f_\x0f\xa0\x0e\x14\x0e\xb5\rA\r\xb8\x0c[\x0c\x0f\x0c\xc1\x0bt\x0b\xf9\n\x8c\n\x01\n]\t\xc5\x08\x0c\x089\x07a\x06\x94\x05\xa4\x04\xbc\x0

In [3]:
def get_arctic_description_file(
        speaker_nickname: str,
        speaker_l1: str,
        gender: str,
        annotation_dir_path: str,
        audio_dir_path: str
) -> list:
    table_rows = list()
    annotation_dir_path = Path(annotation_dir_path)
    for annotation_file in annotation_dir_path.iterdir():
        audio_file = Path(audio_dir_path, f'{annotation_file.stem}.wav')
        table_rows.append([
            speaker_nickname,
            speaker_l1,
            gender,
            str(annotation_file),
            str(audio_file)
        ])

    return table_rows

In [74]:
get_arctic_description_file(
    speaker_nickname='ABA',
    speaker_l1='Arabic',
    gender='M',
    annotation_dir_path=r'E:\voice_datasets\arctic\l2arctic_release_v5.0\ABA\ABA\annotation',
    audio_dir_path=r'E:\voice_datasets\arctic\l2arctic_release_v5.0\ABA\ABA\wav'
)

[['ABA',
  'Arabic',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\annotation\\arctic_a0003.TextGrid',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\wav\\arctic_a0003.wav'],
 ['ABA',
  'Arabic',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\annotation\\arctic_a0005.TextGrid',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\wav\\arctic_a0005.wav'],
 ['ABA',
  'Arabic',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\annotation\\arctic_a0006.TextGrid',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\wav\\arctic_a0006.wav'],
 ['ABA',
  'Arabic',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\annotation\\arctic_a0007.TextGrid',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\wav\\arctic_a0007.wav'],
 ['ABA',
  'Arabic',
  'E:\\voice_datasets\\arctic\\l2arctic_release_v5.0\\ABA\\ABA\\annotation\\arctic_a0008.TextGrid',
  'E:\\voice_datasets\\arctic\\l2arctic

In [5]:
speaker_info = {
    'ABA': ['ABA ', 'Arabic', 'M'],
    'SKA': ['SKA', 'Arabic', 'F'],
    'YBAA': ['YBAA', 'Arabic', 'M'],
    'ZHAA': ['ZHAA', 'Arabic', 'F'],
    'BWC': ['BWC', 'Mandarin', 'M'],
    'LXC': ['LXC', 'Mandarin', 'F'],
    'NCC': ['NCC', 'Mandarin', 'F'],
    'TXHC': ['TXHC', 'Mandarin', 'M'],
    'ASI': ['ASI', 'Hindi', 'M'],
    'RRBI': ['RRBI', 'Hindi', 'M'],
    'SVBI': ['SVBI', 'Hindi', 'F'],
    'TNI': ['TNI', 'Hindi', 'F'],
    'HJK': ['HJK', 'Korean', 'F'],
    'HKK': ['HKK', 'Korean', 'M'],
    'YDCK': ['YDCK', 'Korean', 'F'],
    'YKWK': ['YKWK', 'Korean', 'M'],
    'EBVS': ['EBVS', 'Spanish', 'M'],
    'ERMS': ['ERMS', 'Spanish', 'M'],
    'MBMPS': ['MBMPS', 'Spanish', 'F'],
    'NJS': ['NJS', 'Spanish', 'F'],
    'HQTV': ['HQTV', 'Vietnamese', 'M'],
    'PNV': ['PNV', 'Vietnamese', 'F'],
    'THV': ['THV', 'Vietnamese', 'F'],
    'TLV': ['TLV', 'Vietnamese', 'M']
}

In [3]:
#dataset_file_path = r'E:\voice_datasets\arctic\l2arctic_release_v5.0\data'
dataset_file_path = "/media/maxim/Programming/voice_datasets/arctic/l2arctic_release_v5.0/data"
table = list()
for speaker_dir in Path(dataset_file_path).iterdir():
    table.extend(get_arctic_description_file(
        speaker_nickname=speaker_dir.stem,
        speaker_l1=speaker_info[speaker_dir.stem][1],
        gender=speaker_info[speaker_dir.stem][2],
        annotation_dir_path=str(Path(speaker_dir, 'annotation')),
        audio_dir_path=str(Path(speaker_dir, 'wav'))
    ))

NameError: name 'get_arctic_description_file' is not defined

In [7]:
df = pd.DataFrame(data=table, columns=[
    'speaker_nickname', 'speaker_l1', 'speaker_gender', 'annotation_file_path', 'wav_file_path'
])
df.to_csv('')

Unnamed: 0,speaker_nickname,speaker_l1,speaker_gender,annotation_file_path,audio_dir_path
0,ABA,Arabic,M,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
1,ABA,Arabic,M,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
2,ABA,Arabic,M,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
3,ABA,Arabic,M,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
4,ABA,Arabic,M,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
...,...,...,...,...,...
3594,ZHAA,Arabic,F,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
3595,ZHAA,Arabic,F,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
3596,ZHAA,Arabic,F,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...
3597,ZHAA,Arabic,F,/media/maxim/Programming/voice_datasets/arctic...,/media/maxim/Programming/voice_datasets/arctic...


In [103]:
df.nunique()

speaker_nickname          24
speaker_l1                 6
speaker_gender             2
annotation_file_path    3599
audio_dir_path          3599
dtype: int64