# Import libraries

In [1]:
import os
from pathlib import Path
import shutil
from tqdm import tqdm
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor
import threading
import time

import sys
sys.path.append('..')
from utils.audio_util import convert_wav_to_flac, resample_audios, trim_silence_with_vad
from utils.file_util import recursive_copy

from transliterate.phonemizer import ThaiPhonemizer

# Moving files to the new directory

In [2]:
import time

thai_phonemizer = ThaiPhonemizer()

try:
  thai_phonemizer.phonemize("มอบวัดพระบาทน้ำพุสร้างเตาเผาเอดส์")
except:
  thai_phonemizer.phonemize("มอบวัดพระบาทน้ำพุสร้างเตาเผาเอดส์")

In [3]:
!find "../data/raw/TSync2/wav" -type f -name "*.wav" -exec sh -c 'ffmpeg -i "$1" -c:a pcm_mulaw "${1%.wav}.tmp.wav" && mv "${1%.wav}.tmp.wav" "$1"' _ {} \;

ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

In [4]:
# Define paths
DEST_DIR = "../data/converted/TSync2-to-vctk-ph"
DEST_TEXT_PATH = os.path.join(DEST_DIR, "txt/TSync2")
DEST_AUDIO_PATH = os.path.join(DEST_DIR, "wav44/TSync2")
SRC_AUDIO_PATH = "../data/raw/TSync2/wav"
SRC_TEXT_PATH = "../data/raw/TSync2/wrd_ph"

# Clean and create directories
if os.path.exists(DEST_DIR):
    print("Clearing destination folder")
    shutil.rmtree(DEST_DIR)
os.makedirs(DEST_TEXT_PATH, exist_ok=True)
os.makedirs(DEST_AUDIO_PATH, exist_ok=True)

# Thread-safe set for character collection
all_chars = set()
chars_lock = threading.Lock()

# Thread-safe list for tracking skipped files
skip_files = []
skip_lock = threading.Lock()

# Thread-safe for phonemizer
phonemizer_lock = threading.Lock()

def process_file_pair(args: Tuple[int, Path, Path]) -> None:
    """Process a single pair of audio and text files"""
    i, audio_file, text_file = args
    try:
        # Process audio
        src_audio = str(audio_file)
        dest_audio = os.path.join(DEST_AUDIO_PATH, f"TSync2_{i:03d}_mic1.flac")
        
        if not convert_wav_to_flac(src_audio, dest_audio):
            raise Exception("Failed to convert audio")
        
        # Process text
        with text_file.open('r', encoding='utf-8') as f:
            with phonemizer_lock:
                clean_text = " ".join(thai_phonemizer.phonemize("".join(f.readline().strip().split("|"))))
            # Thread-safe update of all_chars
            with chars_lock:
                all_chars.update(clean_text)
        
        dest_text = os.path.join(DEST_TEXT_PATH, f"TSync2_{i:03d}.txt")
        with open(dest_text, 'w', encoding='utf-8') as f:
            f.write(clean_text)
            
    except Exception as e:
        print(f"Error processing pair {i}: {e}")
        with skip_lock:
            skip_files.append(i)

# Get sorted lists of files
audio_files = sorted(Path(SRC_AUDIO_PATH).glob("*.wav"))
text_files = sorted(Path(SRC_TEXT_PATH).glob("*.txt"))

# Create processing arguments
process_args = [
    (i, audio_file, text_file) 
    for i, (audio_file, text_file) 
    in enumerate(zip(audio_files, text_files), 1)
]

max_workers = os.cpu_count()

# Process files in parallel with progress bar
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    list(tqdm(
        executor.map(process_file_pair, process_args),
        total=len(process_args),
        desc=f"Processing files (using {max_workers} workers)"
    ))

# Print results
print(f"Processed {len(audio_files) - len(skip_files)} file pairs")
print(f"Skipped {len(skip_files)} pairs")
print(f"Unique characters found: {''.join(sorted(all_chars))}")

Clearing destination folder


Processing files (using 6 workers): 100%|██████████| 2710/2710 [07:20<00:00,  6.15it/s]

Processed 2710 file pairs
Skipped 0 pairs
Unique characters found:  2345_abcdefhijklmnoprstuwŋɛɤɯʔʰːᴐ





# Resample, trim, and normalize audio

In [5]:
# Create destination directory if it doesn't exist
os.makedirs("../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed", exist_ok=True)

# Copy all files from wav32 to wav16_silence_trimmed
src_dir = "../data/converted/TSync2-to-vctk-ph/wav44"
dst_dir = "../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed"

recursive_copy(src_dir, dst_dir)

In [6]:
# Resample all files in wav16_silence_trimmed to 16kHz
SAMPLE_RATE = 16000
NUM_RESAMPLE_THREADS = 4

resample_audios(
  input_folders="../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed",
  file_ext="flac",
  sample_rate=SAMPLE_RATE,
  n_jobs=NUM_RESAMPLE_THREADS
)

Resampling the audio files...
Found 2710 files...


100%|██████████| 2710/2710 [00:19<00:00, 139.77it/s]


Done !


In [7]:
# Trim silence at the beginning and end of each audio file
trim_silence_with_vad(
  input_folder="../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed",
  file_extension="flac",
)

Downloading: "https://github.com/snakers4/silero-vad/zipball/master" to /home/titor/.cache/torch/hub/master.zip


Found 2710 .flac files to process


Processing files:   1%|▏         | 36/2710 [00:06<05:14,  8.50it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2252_mic1.flac probably does not have speech please check it !!


Processing files:   2%|▏         | 65/2710 [00:10<05:06,  8.62it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2291_mic1.flac probably does not have speech please check it !!


Processing files:   4%|▍         | 109/2710 [00:16<06:10,  7.02it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2372_mic1.flac probably does not have speech please check it !!


Processing files:   6%|▌         | 157/2710 [00:22<05:32,  7.68it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2622_mic1.flac probably does not have speech please check it !!


Processing files:   9%|▉         | 239/2710 [00:31<04:38,  8.88it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2271_mic1.flac probably does not have speech please check it !!


Processing files:   9%|▉         | 242/2710 [00:32<04:31,  9.08it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2391_mic1.flac probably does not have speech please check it !!


Processing files:   9%|▉         | 249/2710 [00:32<03:58, 10.33it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2635_mic1.flac probably does not have speech please check it !!


Processing files:  10%|█         | 271/2710 [00:35<03:34, 11.38it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2379_mic1.flac probably does not have speech please check it !!


Processing files:  11%|█▏        | 310/2710 [00:41<03:57, 10.11it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2423_mic1.flac probably does not have speech please check it !!


Processing files:  13%|█▎        | 342/2710 [00:45<03:59,  9.89it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2438_mic1.flac probably does not have speech please check it !!


Processing files:  15%|█▍        | 405/2710 [00:52<04:36,  8.32it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2397_mic1.flac probably does not have speech please check it !!


Processing files:  17%|█▋        | 452/2710 [00:58<04:05,  9.20it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2289_mic1.flac probably does not have speech please check it !!


Processing files:  17%|█▋        | 460/2710 [00:59<03:59,  9.40it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2468_mic1.flac probably does not have speech please check it !!


Processing files:  19%|█▉        | 527/2710 [01:08<03:29, 10.40it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2633_mic1.flac probably does not have speech please check it !!


Processing files:  20%|██        | 554/2710 [01:11<03:15, 11.05it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2382_mic1.flac probably does not have speech please check it !!


Processing files:  22%|██▏       | 596/2710 [01:16<06:25,  5.48it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2543_mic1.flac probably does not have speech please check it !!


Processing files:  24%|██▎       | 643/2710 [01:22<04:08,  8.30it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2196_mic1.flac probably does not have speech please check it !!


Processing files:  24%|██▍       | 654/2710 [01:23<04:32,  7.55it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2563_mic1.flac probably does not have speech please check it !!


Processing files:  26%|██▌       | 705/2710 [01:29<03:53,  8.60it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2168_mic1.flac probably does not have speech please check it !!


Processing files:  27%|██▋       | 719/2710 [01:31<03:08, 10.55it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2674_mic1.flac probably does not have speech please check it !!


Processing files:  29%|██▊       | 773/2710 [01:39<03:21,  9.62it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2643_mic1.flac probably does not have speech please check it !!


Processing files:  29%|██▉       | 787/2710 [01:40<02:36, 12.26it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2410_mic1.flac probably does not have speech please check it !!


Processing files:  30%|██▉       | 806/2710 [01:43<03:52,  8.18it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2594_mic1.flac probably does not have speech please check it !!


Processing files:  35%|███▌      | 958/2710 [02:05<03:05,  9.44it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2366_mic1.flac probably does not have speech please check it !!


Processing files:  36%|███▌      | 972/2710 [02:07<03:21,  8.63it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2593_mic1.flac probably does not have speech please check it !!


Processing files:  37%|███▋      | 1005/2710 [02:11<02:33, 11.12it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2164_mic1.flac probably does not have speech please check it !!


Processing files:  39%|███▊      | 1044/2710 [02:16<03:17,  8.45it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2220_mic1.flac probably does not have speech please check it !!


Processing files:  41%|████      | 1105/2710 [02:24<02:38, 10.13it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2567_mic1.flac probably does not have speech please check it !!


Processing files:  42%|████▏     | 1132/2710 [02:28<02:28, 10.61it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2422_mic1.flac probably does not have speech please check it !!


Processing files:  43%|████▎     | 1153/2710 [02:30<03:05,  8.38it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2606_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1276/2710 [02:47<02:26,  9.76it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2660_mic1.flac probably does not have speech please check it !!


Processing files:  53%|█████▎    | 1429/2710 [03:09<02:13,  9.62it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2569_mic1.flac probably does not have speech please check it !!


Processing files:  53%|█████▎    | 1433/2710 [03:10<02:02, 10.46it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2575_mic1.flac probably does not have speech please check it !!


Processing files:  55%|█████▌    | 1494/2710 [03:18<02:18,  8.80it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2420_mic1.flac probably does not have speech please check it !!


Processing files:  59%|█████▉    | 1603/2710 [03:32<01:58,  9.36it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2161_mic1.flac probably does not have speech please check it !!


Processing files:  60%|█████▉    | 1613/2710 [03:34<02:20,  7.80it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2234_mic1.flac probably does not have speech please check it !!


Processing files:  62%|██████▏   | 1693/2710 [03:44<02:11,  7.75it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2360_mic1.flac probably does not have speech please check it !!


Processing files:  64%|██████▍   | 1747/2710 [03:51<01:07, 14.19it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2530_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2305_mic1.flac probably does not have speech please check it !!


Processing files:  66%|██████▌   | 1780/2710 [03:56<01:54,  8.14it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2163_mic1.flac probably does not have speech please check it !!


Processing files:  73%|███████▎  | 1966/2710 [04:22<01:12, 10.23it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2421_mic1.flac probably does not have speech please check it !!


Processing files:  73%|███████▎  | 1977/2710 [04:23<01:11, 10.23it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2171_mic1.flac probably does not have speech please check it !!


Processing files:  74%|███████▍  | 2013/2710 [04:28<01:25,  8.16it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2685_mic1.flac probably does not have speech please check it !!


Processing files:  74%|███████▍  | 2018/2710 [04:28<01:02, 11.10it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2346_mic1.flac probably does not have speech please check it !!


Processing files:  77%|███████▋  | 2099/2710 [04:39<01:02,  9.71it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2287_mic1.flac probably does not have speech please check it !!


Processing files:  78%|███████▊  | 2114/2710 [04:40<00:51, 11.56it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2332_mic1.flac probably does not have speech please check it !!


Processing files:  79%|███████▉  | 2153/2710 [04:46<01:12,  7.72it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2493_mic1.flac probably does not have speech please check it !!


Processing files:  81%|████████  | 2197/2710 [04:53<00:55,  9.26it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2219_mic1.flac probably does not have speech please check it !!


Processing files:  85%|████████▍ | 2301/2710 [05:07<00:45,  8.92it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2617_mic1.flac probably does not have speech please check it !!


Processing files:  92%|█████████▏| 2483/2710 [05:30<00:20, 10.82it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2533_mic1.flac probably does not have speech please check it !!


Processing files:  92%|█████████▏| 2485/2710 [05:30<00:19, 11.34it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2494_mic1.flac probably does not have speech please check it !!


Processing files:  92%|█████████▏| 2496/2710 [05:31<00:20, 10.26it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2350_mic1.flac probably does not have speech please check it !!


Processing files:  92%|█████████▏| 2502/2710 [05:32<00:23,  8.94it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2477_mic1.flac probably does not have speech please check it !!


Processing files:  93%|█████████▎| 2514/2710 [05:33<00:19, 10.10it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2447_mic1.flac probably does not have speech please check it !!


Processing files:  94%|█████████▍| 2546/2710 [05:37<00:16, 10.11it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2259_mic1.flac probably does not have speech please check it !!


Processing files:  97%|█████████▋| 2630/2710 [05:49<00:14,  5.68it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2380_mic1.flac probably does not have speech please check it !!


Processing files:  98%|█████████▊| 2656/2710 [05:52<00:05,  9.08it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2329_mic1.flac probably does not have speech please check it !!


Processing files:  99%|█████████▉| 2677/2710 [05:55<00:03,  8.56it/s]

> The file ../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed/TSync2/TSync2_2526_mic1.flac probably does not have speech please check it !!


Processing files: 100%|██████████| 2710/2710 [05:59<00:00,  7.54it/s]


Processing complete

Found 58 files with no speech. List saved to ../data/converted/TSync2-to-vctk-ph/no_speech_files.txt





In [8]:
# Normalize the volume of all audio files to -27dB
!find "../data/converted/TSync2-to-vctk-ph/wav16_silence_trimmed" -type f -name "*.flac" -exec sh -c 'ffmpeg-normalize "$1" -nt rms -t=-27 -o "$1" -ar 16000 -f -ext flac -c:a flac' _ {} \;

# Create metadata

In [10]:
DEST_DIR = Path(DEST_DIR)

# Write character files
sorted_chars = sorted(all_chars)
with open(DEST_DIR / 'all_chars_unicode.txt', 'w') as f:
   f.write(''.join(c.encode('unicode_escape').decode('ascii') for c in sorted_chars))
   
# with open(DEST_DIR / 'all_chars.txt', 'w') as f:
#    f.write(''.join(sorted_chars))