# Import libraries

In [1]:
import os
from pathlib import Path
import shutil
from tqdm import tqdm
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor
import threading

import sys
sys.path.append('..')
from utils.audio_util import convert_wav_to_flac, resample_audios, trim_silence_with_vad
from utils.file_util import recursive_copy

# Moving files to the new directory

In [2]:
!find "../data/raw/TSync2/wav" -type f -name "*.wav" -exec sh -c 'ffmpeg -i "$1" -c:a pcm_mulaw "${1%.wav}.tmp.wav" && mv "${1%.wav}.tmp.wav" "$1"' _ {} \;

ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 16.0.0 (clang-1600.0.26.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --e

In [3]:
# Define paths
DEST_DIR = "../data/converted/TSync2-to-vctk"
DEST_TEXT_PATH = os.path.join(DEST_DIR, "txt/TSync2")
DEST_AUDIO_PATH = os.path.join(DEST_DIR, "wav44/TSync2")
SRC_AUDIO_PATH = "../data/raw/TSync2/wav"
SRC_TEXT_PATH = "../data/raw/TSync2/wrd_ph"

# Clean and create directories
if os.path.exists(DEST_DIR):
    print("Clearing destination folder")
    shutil.rmtree(DEST_DIR)
os.makedirs(DEST_TEXT_PATH, exist_ok=True)
os.makedirs(DEST_AUDIO_PATH, exist_ok=True)

# Thread-safe set for character collection
all_chars = set()
chars_lock = threading.Lock()

# Thread-safe list for tracking skipped files
skip_files = []
skip_lock = threading.Lock()

def process_file_pair(args: Tuple[int, Path, Path]) -> None:
    """Process a single pair of audio and text files"""
    i, audio_file, text_file = args
    try:
        # Process audio
        src_audio = str(audio_file)
        dest_audio = os.path.join(DEST_AUDIO_PATH, f"TSync2_{i:03d}_mic1.flac")
        
        if not convert_wav_to_flac(src_audio, dest_audio):
            raise Exception("Failed to convert audio")
        
        # Process text
        with text_file.open('r', encoding='utf-8') as f:
            clean_text = "".join(f.readline().strip().split("|"))
            # Thread-safe update of all_chars
            with chars_lock:
                all_chars.update(clean_text)
        
        dest_text = os.path.join(DEST_TEXT_PATH, f"TSync2_{i:03d}.txt")
        with open(dest_text, 'w', encoding='utf-8') as f:
            f.write(clean_text)
            
    except Exception as e:
        print(f"Error processing pair {i}: {e}")
        with skip_lock:
            skip_files.append(i)

# Get sorted lists of files
audio_files = sorted(Path(SRC_AUDIO_PATH).glob("*.wav"))
text_files = sorted(Path(SRC_TEXT_PATH).glob("*.txt"))

# Create processing arguments
process_args = [
    (i, audio_file, text_file) 
    for i, (audio_file, text_file) 
    in enumerate(zip(audio_files, text_files), 1)
]

max_workers = os.cpu_count()

# Process files in parallel with progress bar
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    list(tqdm(
        executor.map(process_file_pair, process_args),
        total=len(process_args),
        desc=f"Processing files (using {max_workers} workers)"
    ))

# Print results
print(f"Processed {len(audio_files) - len(skip_files)} file pairs")
print(f"Skipped {len(skip_files)} pairs")
print(f"Unique characters found: {''.join(sorted(all_chars))}")

Clearing destination folder


Processing files (using 8 workers): 100%|██████████| 2710/2710 [01:37<00:00, 27.93it/s]

Processed 2710 file pairs
Skipped 0 pairs
Unique characters found: .กขคฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลวศษสหฬอฮะัาำิีึืุูเแโใไๆ็่้๊๋์ํ





# Resample, trim, and normalize audio

In [4]:
# Create destination directory if it doesn't exist
os.makedirs("../data/converted/TSync2-to-vctk/wav16_silence_trimmed", exist_ok=True)

# Copy all files from wav32 to wav16_silence_trimmed
src_dir = "../data/converted/TSync2-to-vctk/wav44"
dst_dir = "../data/converted/TSync2-to-vctk/wav16_silence_trimmed"

recursive_copy(src_dir, dst_dir)

In [5]:
# Resample all files in wav16_silence_trimmed to 16kHz
SAMPLE_RATE = 16000
NUM_RESAMPLE_THREADS = 4

resample_audios(
  input_folders="../data/converted/TSync2-to-vctk/wav16_silence_trimmed",
  file_ext="flac",
  sample_rate=SAMPLE_RATE,
  n_jobs=NUM_RESAMPLE_THREADS
)

Resampling the audio files...
Found 2710 files...


100%|██████████| 2710/2710 [00:08<00:00, 315.22it/s]

Done !





In [6]:
# Trim silence at the beginning and end of each audio file
trim_silence_with_vad(
  input_folder="../data/converted/TSync2-to-vctk/wav16_silence_trimmed",
  file_extension="flac",
)

Downloading: "https://github.com/snakers4/silero-vad/zipball/master" to /Users/titor/.cache/torch/hub/master.zip


Found 2710 .flac files to process


Processing files:   4%|▎         | 100/2710 [00:04<01:21, 32.14it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2271_mic1.flac probably does not have speech please check it !!


Processing files:  13%|█▎        | 352/2710 [00:13<01:08, 34.54it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2382_mic1.flac probably does not have speech please check it !!


Processing files:  15%|█▌        | 412/2710 [00:15<01:16, 30.00it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2219_mic1.flac probably does not have speech please check it !!


Processing files:  18%|█▊        | 481/2710 [00:18<01:03, 34.85it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2530_mic1.flac probably does not have speech please check it !!


Processing files:  19%|█▉        | 528/2710 [00:20<01:26, 25.20it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2346_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2493_mic1.flac probably does not have speech please check it !!


Processing files:  20%|██        | 549/2710 [00:20<01:01, 35.14it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2420_mic1.flac probably does not have speech please check it !!


Processing files:  21%|██        | 568/2710 [00:21<01:13, 29.16it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2164_mic1.flac probably does not have speech please check it !!


Processing files:  27%|██▋       | 740/2710 [00:27<01:07, 29.15it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2379_mic1.flac probably does not have speech please check it !!


Processing files:  29%|██▉       | 783/2710 [00:29<01:05, 29.44it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2391_mic1.flac probably does not have speech please check it !!


Processing files:  33%|███▎      | 893/2710 [00:33<00:58, 31.11it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2360_mic1.flac probably does not have speech please check it !!


Processing files:  33%|███▎      | 901/2710 [00:33<01:00, 29.91it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2468_mic1.flac probably does not have speech please check it !!


Processing files:  35%|███▍      | 942/2710 [00:35<01:01, 28.80it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2163_mic1.flac probably does not have speech please check it !!


Processing files:  36%|███▌      | 963/2710 [00:36<00:57, 30.57it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2575_mic1.flac probably does not have speech please check it !!


Processing files:  38%|███▊      | 1020/2710 [00:38<00:55, 30.46it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2494_mic1.flac probably does not have speech please check it !!


Processing files:  38%|███▊      | 1040/2710 [00:38<00:56, 29.65it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2438_mic1.flac probably does not have speech please check it !!


Processing files:  39%|███▊      | 1050/2710 [00:39<00:50, 33.04it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2674_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2397_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2168_mic1.flac probably does not have speech please check it !!


Processing files:  40%|███▉      | 1075/2710 [00:39<00:48, 33.90it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2660_mic1.flac probably does not have speech please check it !!


Processing files:  41%|████      | 1117/2710 [00:41<00:49, 32.15it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2477_mic1.flac probably does not have speech please check it !!


Processing files:  43%|████▎     | 1169/2710 [00:43<00:44, 34.29it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2305_mic1.flac probably does not have speech please check it !!


Processing files:  44%|████▍     | 1193/2710 [00:44<00:49, 30.94it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2329_mic1.flac probably does not have speech please check it !!


Processing files:  46%|████▌     | 1245/2710 [00:45<00:44, 32.62it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2171_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1266/2710 [00:46<00:38, 37.28it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2567_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2617_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2421_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1287/2710 [00:47<00:44, 31.71it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2366_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2685_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2220_mic1.flac probably does not have speech please check it !!


Processing files:  49%|████▊     | 1318/2710 [00:48<00:41, 33.80it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2622_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2287_mic1.flac probably does not have speech please check it !!


Processing files:  49%|████▉     | 1339/2710 [00:48<00:47, 29.04it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2234_mic1.flac probably does not have speech please check it !!


Processing files:  50%|████▉     | 1343/2710 [00:49<00:46, 29.11it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2372_mic1.flac probably does not have speech please check it !!


Processing files:  52%|█████▏    | 1407/2710 [00:51<00:45, 28.75it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2447_mic1.flac probably does not have speech please check it !!


Processing files:  54%|█████▍    | 1464/2710 [00:53<00:39, 31.48it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2593_mic1.flac probably does not have speech please check it !!


Processing files:  55%|█████▌    | 1499/2710 [00:54<00:39, 30.34it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2252_mic1.flac probably does not have speech please check it !!


Processing files:  59%|█████▉    | 1603/2710 [00:58<00:33, 33.06it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2606_mic1.flac probably does not have speech please check it !!


Processing files:  61%|██████    | 1647/2710 [01:00<00:50, 20.92it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2543_mic1.flac probably does not have speech please check it !!


Processing files:  61%|██████▏   | 1660/2710 [01:00<00:46, 22.44it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2633_mic1.flac probably does not have speech please check it !!


Processing files:  63%|██████▎   | 1706/2710 [01:02<00:37, 26.44it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2289_mic1.flac probably does not have speech please check it !!


Processing files:  63%|██████▎   | 1714/2710 [01:02<00:36, 27.24it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2526_mic1.flac probably does not have speech please check it !!


Processing files:  66%|██████▌   | 1794/2710 [01:05<00:30, 30.32it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2380_mic1.flac probably does not have speech please check it !!


Processing files:  67%|██████▋   | 1816/2710 [01:06<00:25, 35.06it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2569_mic1.flac probably does not have speech please check it !!


Processing files:  71%|███████▏  | 1936/2710 [01:10<00:24, 32.00it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2635_mic1.flac probably does not have speech please check it !!


Processing files:  72%|███████▏  | 1940/2710 [01:10<00:24, 31.66it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2259_mic1.flac probably does not have speech please check it !!


Processing files:  73%|███████▎  | 1981/2710 [01:12<00:22, 32.25it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2422_mic1.flac probably does not have speech please check it !!


Processing files:  75%|███████▍  | 2028/2710 [01:13<00:20, 33.98it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2350_mic1.flac probably does not have speech please check it !!


Processing files:  82%|████████▏ | 2216/2710 [01:20<00:15, 32.36it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2161_mic1.flac probably does not have speech please check it !!


Processing files:  83%|████████▎ | 2251/2710 [01:22<00:14, 30.86it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2563_mic1.flac probably does not have speech please check it !!


Processing files:  87%|████████▋ | 2369/2710 [01:26<00:10, 31.14it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2410_mic1.flac probably does not have speech please check it !!


Processing files:  88%|████████▊ | 2381/2710 [01:27<00:13, 24.71it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2533_mic1.flac probably does not have speech please check it !!


Processing files:  88%|████████▊ | 2389/2710 [01:27<00:11, 28.60it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2643_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2196_mic1.flac probably does not have speech please check it !!


Processing files:  89%|████████▉ | 2420/2710 [01:28<00:07, 36.56it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2594_mic1.flac probably does not have speech please check it !!


Processing files:  93%|█████████▎| 2531/2710 [01:32<00:06, 28.36it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2332_mic1.flac probably does not have speech please check it !!


Processing files:  95%|█████████▌| 2576/2710 [01:34<00:05, 25.76it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2291_mic1.flac probably does not have speech please check it !!


Processing files: 100%|█████████▉| 2704/2710 [01:38<00:00, 27.83it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2423_mic1.flac probably does not have speech please check it !!


Processing files: 100%|██████████| 2710/2710 [01:39<00:00, 27.33it/s]


Processing complete

Found 58 files with no speech. List saved to ../data/converted/TSync2-to-vctk/no_speech_files.txt





In [7]:
# Normalize the volume of all audio files to -27dB
!find "../data/converted/TSync2-to-vctk/wav16_silence_trimmed" -type f -name "*.flac" -exec sh -c 'ffmpeg-normalize "$1" -nt rms -t=-27 -o "$1" -ar 16000 -f -ext flac -c:a flac' _ {} \;

# Create metadata

In [8]:
DEST_DIR = Path(DEST_DIR)

# Write character files
sorted_chars = sorted(all_chars)
with open(DEST_DIR / 'all_chars_unicode.txt', 'w') as f:
   f.write(''.join(c.encode('unicode_escape').decode('ascii') for c in sorted_chars))
   
with open(DEST_DIR / 'all_chars.txt', 'w') as f:
   f.write(''.join(sorted_chars))