# Import libraries

In [1]:
import os
from pathlib import Path
import shutil
from tqdm import tqdm
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor
import threading

import json

import sys
sys.path.append('..')
from utils.audio_util import convert_wav_to_flac, resample_audios, trim_silence_with_vad
from utils.file_util import recursive_copy

# Moving files to the new directory

In [2]:
!find "../data/raw/TSync2/wav" -type f -name "*.wav" -exec sh -c 'ffmpeg -i "$1" -c:a pcm_mulaw "${1%.wav}.tmp.wav" && mv "${1%.wav}.tmp.wav" "$1"' _ {} \;

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/6.1.1_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvin

In [3]:
# Define paths
DEST_DIR = "../data/converted/TSync2-to-vctk"
DEST_TEXT_PATH = os.path.join(DEST_DIR, "txt/TSync2")
DEST_AUDIO_PATH = os.path.join(DEST_DIR, "wav44/TSync2")
SRC_AUDIO_PATH = "../data/raw/TSync2/wav"
SRC_TEXT_PATH = "../data/raw/TSync2/wrd_ph"

# Clean and create directories
if os.path.exists(DEST_DIR):
    print("Clearing destination folder")
    shutil.rmtree(DEST_DIR)
os.makedirs(DEST_TEXT_PATH, exist_ok=True)
os.makedirs(DEST_AUDIO_PATH, exist_ok=True)

# Thread-safe set for character collection
all_chars = set()
chars_lock = threading.Lock()

# Thread-safe list for tracking skipped files
skip_files = []
skip_lock = threading.Lock()

def process_file_pair(args: Tuple[int, Path, Path]) -> None:
    """Process a single pair of audio and text files"""
    i, audio_file, text_file = args
    try:
        # Process audio
        src_audio = str(audio_file)
        dest_audio = os.path.join(DEST_AUDIO_PATH, f"TSync2_{i:03d}_mic1.flac")
        
        if not convert_wav_to_flac(src_audio, dest_audio):
            raise Exception("Failed to convert audio")
        
        # Process text
        with text_file.open('r', encoding='utf-8') as f:
            clean_text = "".join(f.readline().strip().split("|"))
            # Thread-safe update of all_chars
            with chars_lock:
                all_chars.update(clean_text)
        
        dest_text = os.path.join(DEST_TEXT_PATH, f"TSync2_{i:03d}.txt")
        with open(dest_text, 'w', encoding='utf-8') as f:
            f.write(clean_text)
            
    except Exception as e:
        print(f"Error processing pair {i}: {e}")
        with skip_lock:
            skip_files.append(i)

# Get sorted lists of files
audio_files = sorted(Path(SRC_AUDIO_PATH).glob("*.wav"))
text_files = sorted(Path(SRC_TEXT_PATH).glob("*.txt"))

# Create processing arguments
process_args = [
    (i, audio_file, text_file) 
    for i, (audio_file, text_file) 
    in enumerate(zip(audio_files, text_files), 1)
]

max_workers = os.cpu_count()

# Process files in parallel with progress bar
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    list(tqdm(
        executor.map(process_file_pair, process_args),
        total=len(process_args),
        desc=f"Processing files (using {max_workers} workers)"
    ))

# Print results
print(f"Processed {len(audio_files) - len(skip_files)} file pairs")
print(f"Skipped {len(skip_files)} pairs")
print(f"Unique characters found: {''.join(sorted(all_chars))}")

Processing files (using 8 workers): 100%|██████████| 2710/2710 [05:00<00:00,  9.01it/s]

Processed 2710 file pairs
Skipped 0 pairs
Unique characters found: .กขคฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลวศษสหฬอฮะัาำิีึืุูเแโใไๆ็่้๊๋์ํ





# Resample and trim audio

In [4]:
# Create destination directory if it doesn't exist
os.makedirs("../data/converted/TSync2-to-vctk/wav16_silence_trimmed", exist_ok=True)

# Copy all files from wav32 to wav16_silence_trimmed
src_dir = "../data/converted/TSync2-to-vctk/wav44"
dst_dir = "../data/converted/TSync2-to-vctk/wav16_silence_trimmed"

recursive_copy(src_dir, dst_dir)

In [5]:
SAMPLE_RATE = 16000
NUM_RESAMPLE_THREADS = 4

resample_audios(
  input_folders="../data/converted/TSync2-to-vctk/wav16_silence_trimmed",
  file_ext="flac",
  sample_rate=SAMPLE_RATE,
  n_jobs=NUM_RESAMPLE_THREADS
)

Resampling the audio files...
Found 2710 files...


100%|██████████| 2710/2710 [00:37<00:00, 73.16it/s] 

Done !





In [6]:
trim_silence_with_vad(
  input_folder="../data/converted/TSync2-to-vctk/wav16_silence_trimmed",
  file_extension="flac",
)

Downloading: "https://github.com/snakers4/silero-vad/zipball/master" to /Users/titor/.cache/torch/hub/master.zip


Found 2710 .flac files to process


Processing files:   4%|▎         | 99/2710 [00:09<02:23, 18.16it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2271_mic1.flac probably does not have speech please check it !!


Processing files:  13%|█▎        | 349/2710 [00:29<03:07, 12.61it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2382_mic1.flac probably does not have speech please check it !!


Processing files:  15%|█▌        | 412/2710 [00:36<02:50, 13.48it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2219_mic1.flac probably does not have speech please check it !!


Processing files:  18%|█▊        | 478/2710 [00:42<01:56, 19.21it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2530_mic1.flac probably does not have speech please check it !!


Processing files:  19%|█▉        | 526/2710 [00:48<02:57, 12.33it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2346_mic1.flac probably does not have speech please check it !!


Processing files:  20%|█▉        | 532/2710 [00:49<03:25, 10.62it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2493_mic1.flac probably does not have speech please check it !!


Processing files:  20%|██        | 545/2710 [00:49<02:10, 16.53it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2420_mic1.flac probably does not have speech please check it !!


Processing files:  21%|██        | 572/2710 [00:51<01:54, 18.68it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2164_mic1.flac probably does not have speech please check it !!


Processing files:  27%|██▋       | 739/2710 [01:04<02:58, 11.04it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2379_mic1.flac probably does not have speech please check it !!


Processing files:  29%|██▉       | 782/2710 [01:08<02:23, 13.47it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2391_mic1.flac probably does not have speech please check it !!


Processing files:  33%|███▎      | 889/2710 [01:17<02:11, 13.81it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2360_mic1.flac probably does not have speech please check it !!


Processing files:  33%|███▎      | 899/2710 [01:18<02:47, 10.81it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2468_mic1.flac probably does not have speech please check it !!


Processing files:  35%|███▍      | 940/2710 [01:21<01:39, 17.76it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2163_mic1.flac probably does not have speech please check it !!


Processing files:  35%|███▌      | 960/2710 [01:22<01:38, 17.82it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2575_mic1.flac probably does not have speech please check it !!


Processing files:  38%|███▊      | 1020/2710 [01:28<01:42, 16.48it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2494_mic1.flac probably does not have speech please check it !!


Processing files:  38%|███▊      | 1039/2710 [01:29<01:38, 16.90it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2438_mic1.flac probably does not have speech please check it !!


Processing files:  39%|███▊      | 1050/2710 [01:30<01:25, 19.48it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2674_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2397_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2168_mic1.flac probably does not have speech please check it !!


Processing files:  40%|███▉      | 1072/2710 [01:31<01:24, 19.48it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2660_mic1.flac probably does not have speech please check it !!


Processing files:  41%|████      | 1114/2710 [01:35<01:36, 16.54it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2477_mic1.flac probably does not have speech please check it !!


Processing files:  43%|████▎     | 1166/2710 [01:40<01:56, 13.29it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2305_mic1.flac probably does not have speech please check it !!


Processing files:  44%|████▍     | 1190/2710 [01:43<02:06, 12.00it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2329_mic1.flac probably does not have speech please check it !!


Processing files:  46%|████▌     | 1244/2710 [01:47<01:50, 13.32it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2171_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1262/2710 [01:48<01:18, 18.43it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2567_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2617_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1267/2710 [01:48<01:13, 19.51it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2421_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1283/2710 [01:49<02:01, 11.73it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2366_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2685_mic1.flac probably does not have speech please check it !!


Processing files:  47%|████▋     | 1287/2710 [01:50<02:19, 10.21it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2220_mic1.flac probably does not have speech please check it !!


Processing files:  48%|████▊     | 1311/2710 [01:52<01:30, 15.49it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2622_mic1.flac probably does not have speech please check it !!


Processing files:  49%|████▊     | 1319/2710 [01:53<02:04, 11.16it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2287_mic1.flac probably does not have speech please check it !!


Processing files:  49%|████▉     | 1334/2710 [01:54<01:59, 11.56it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2234_mic1.flac probably does not have speech please check it !!


Processing files:  49%|████▉     | 1341/2710 [01:54<01:25, 15.96it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2372_mic1.flac probably does not have speech please check it !!


Processing files:  52%|█████▏    | 1404/2710 [01:59<01:18, 16.63it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2447_mic1.flac probably does not have speech please check it !!


Processing files:  54%|█████▍    | 1460/2710 [02:05<02:50,  7.32it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2593_mic1.flac probably does not have speech please check it !!


Processing files:  55%|█████▌    | 1498/2710 [02:08<01:50, 10.97it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2252_mic1.flac probably does not have speech please check it !!


Processing files:  59%|█████▉    | 1599/2710 [02:16<01:07, 16.45it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2606_mic1.flac probably does not have speech please check it !!


Processing files:  61%|██████    | 1649/2710 [02:23<01:57,  9.05it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2543_mic1.flac probably does not have speech please check it !!


Processing files:  61%|██████    | 1657/2710 [02:23<01:19, 13.22it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2633_mic1.flac probably does not have speech please check it !!


Processing files:  63%|██████▎   | 1702/2710 [02:27<01:28, 11.40it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2289_mic1.flac probably does not have speech please check it !!


Processing files:  63%|██████▎   | 1714/2710 [02:28<01:18, 12.65it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2526_mic1.flac probably does not have speech please check it !!


Processing files:  66%|██████▌   | 1793/2710 [02:32<00:28, 31.86it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2380_mic1.flac probably does not have speech please check it !!


Processing files:  67%|██████▋   | 1815/2710 [02:32<00:24, 37.02it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2569_mic1.flac probably does not have speech please check it !!


Processing files:  71%|███████▏  | 1936/2710 [02:36<00:23, 33.62it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2635_mic1.flac probably does not have speech please check it !!


Processing files:  72%|███████▏  | 1940/2710 [02:36<00:22, 33.73it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2259_mic1.flac probably does not have speech please check it !!


Processing files:  73%|███████▎  | 1978/2710 [02:38<00:22, 32.81it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2422_mic1.flac probably does not have speech please check it !!


Processing files:  75%|███████▍  | 2029/2710 [02:39<00:19, 34.31it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2350_mic1.flac probably does not have speech please check it !!


Processing files:  82%|████████▏ | 2219/2710 [02:46<00:13, 36.80it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2161_mic1.flac probably does not have speech please check it !!


Processing files:  83%|████████▎ | 2252/2710 [02:47<00:13, 33.38it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2563_mic1.flac probably does not have speech please check it !!


Processing files:  87%|████████▋ | 2367/2710 [02:51<00:11, 28.66it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2410_mic1.flac probably does not have speech please check it !!


Processing files:  88%|████████▊ | 2380/2710 [02:52<00:12, 26.50it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2533_mic1.flac probably does not have speech please check it !!


Processing files:  88%|████████▊ | 2388/2710 [02:52<00:11, 29.25it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2643_mic1.flac probably does not have speech please check it !!
> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2196_mic1.flac probably does not have speech please check it !!


Processing files:  89%|████████▉ | 2423/2710 [02:53<00:07, 37.59it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2594_mic1.flac probably does not have speech please check it !!


Processing files:  93%|█████████▎| 2529/2710 [02:57<00:06, 26.38it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2332_mic1.flac probably does not have speech please check it !!


Processing files:  95%|█████████▍| 2573/2710 [03:01<00:15,  8.77it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2291_mic1.flac probably does not have speech please check it !!


Processing files: 100%|█████████▉| 2703/2710 [03:13<00:00, 15.42it/s]

> The file ../data/converted/TSync2-to-vctk/wav16_silence_trimmed/TSync2/TSync2_2423_mic1.flac probably does not have speech please check it !!


Processing files: 100%|██████████| 2710/2710 [03:13<00:00, 14.00it/s]


Processing complete

Found 58 files with no speech. List saved to ../data/converted/TSync2-to-vctk/no_speech_files.txt





# Create metadata

In [7]:
DEST_DIR = Path(DEST_DIR)

# Write character files
sorted_chars = sorted(all_chars)
with open(DEST_DIR / 'all_chars_unicode.txt', 'w') as f:
   f.write(''.join(c.encode('unicode_escape').decode('ascii') for c in sorted_chars))
   
with open(DEST_DIR / 'all_chars.txt', 'w') as f:
   f.write(''.join(sorted_chars))