# Import libraries

In [1]:
import os
from pathlib import Path
import shutil
from pydub import AudioSegment
from tqdm import tqdm

from TTS.bin.resample import resample_files
from TTS.utils.vad import get_vad_model_and_utils, remove_silence

# Moving files to the new directory

In [None]:
# Define paths
DEST_DIR = "../data/converted/TSync2-to-vctk"
DEST_TEXT_PATH = os.path.join(DEST_DIR, "txt/Tsync2")
DEST_AUDIO_PATH = os.path.join(DEST_DIR, "wav32/Tsync2")
SRC_AUDIO_PATH = "../data/raw/TSync2/wav"
SRC_TEXT_PATH = "../data/raw/TSync2/wrd_ph"

def convert_wav_to_flac(src_path: str, dst_path: str) -> bool:
    """Convert WAV file to FLAC format using pydub"""
    try:
        # Load WAV file
        audio = AudioSegment.from_wav(src_path)
        
        # Export as FLAC
        audio.export(
            dst_path,
            format="flac",
            parameters=[
                "-ac", "1",  # mono audio
                "-ar", "32000",  # 32kHz sample rate
                "-compression_level", "8"  # highest compression
            ]
        )
    except Exception as e:
        print(f"Error converting {src_path}: {str(e)}")
        return False
    return True

# Clean and create directories
if os.path.exists(DEST_DIR):
    print("Clearing destination folder")
    shutil.rmtree(DEST_DIR)
os.makedirs(DEST_TEXT_PATH, exist_ok=True)
os.makedirs(DEST_AUDIO_PATH, exist_ok=True)

all_chars = set()
skip_files = []

# Get sorted lists of files
audio_files = sorted(Path(SRC_AUDIO_PATH).glob("*.wav"))
text_files = sorted(Path(SRC_TEXT_PATH).glob("*.txt"))

# Process files with progress bar
for i, (audio_file, text_file) in enumerate(tqdm(zip(audio_files, text_files), total=len(audio_files), desc="Processing files"), 1):
    try:
        # Process audio
        src_audio = str(audio_file)
        dest_audio = os.path.join(DEST_AUDIO_PATH, f"Tsync2_{i:03d}_mic1.flac")
        
        if not convert_wav_to_flac(src_audio, dest_audio):
            raise Exception("Failed to convert audio")
        
        # Process text
        with text_file.open('r', encoding='utf-8') as f:
            clean_text = "".join(f.readline().strip().split("|"))
            all_chars.update(clean_text)
        
        dest_text = os.path.join(DEST_TEXT_PATH, f"Tsync2_{i:03d}.txt")
        with open(dest_text, 'w', encoding='utf-8') as f:
            f.write(clean_text)
            
    except Exception as e:
        print(f"Error processing pair {i}: {e}")
        skip_files.append(i)
        continue

print(f"Processed {len(audio_files) - len(skip_files)} file pairs")
print(f"Skipped {len(skip_files)} pairs")
print(f"Unique characters found: {''.join(sorted(all_chars))}")

# Resample and trim audio

In [None]:
# Create destination directory if it doesn't exist
os.makedirs("../data/converted/commonvoice-to-vctk/wav16_silence_trimmed", exist_ok=True)

# Copy all files from wav32 to wav16_silence_trimmed
src_dir = "../data/converted/commonvoice-to-vctk/wav32"
dst_dir = "../data/converted/commonvoice-to-vctk/wav16_silence_trimmed"

# Walk through the source directory and copy files while preserving directory structure
for root, dirs, files in os.walk(src_dir):
  for dir_name in dirs:
    src_path = os.path.join(root, dir_name)
    dst_path = os.path.join(dst_dir, os.path.relpath(src_path, src_dir))
    os.makedirs(dst_path, exist_ok=True)
  
  for file_name in files:
    src_path = os.path.join(root, file_name)
    dst_path = os.path.join(dst_dir, os.path.relpath(src_path, src_dir))
    shutil.copy2(src_path, dst_path)

In [None]:
SAMPLE_RATE = 16000
NUM_RESAMPLE_THREADS = 4

resample_files("../data/converted/commonvoice-to-vctk/wav16_silence_trimmed", SAMPLE_RATE, file_ext="flac", n_jobs=NUM_RESAMPLE_THREADS)

In [None]:
input_folder = Path("../data/converted/commonvoice-to-vctk/wav16_silence_trimmed")

# Get VAD model once
model_and_utils = get_vad_model_and_utils(use_cuda=torch.cuda.is_available(), use_onnx=False)

# Get all .flac files
flac_files = list(input_folder.glob('**/*.flac'))
total_files = len(flac_files)
print(f"Found {total_files} .flac files to process")

# Track files with no speech detected
no_speech_files = []

for input_path in tqdm(flac_files, desc="Processing files"):
   # Preserve directory structure
   relative_path = input_path.relative_to(input_folder)
   output_path = input_folder / relative_path
   
   # Create subdirectories
   output_path.parent.mkdir(parents=True, exist_ok=True)
   
   try:
       output_path, is_speech = remove_silence(
           model_and_utils,
           str(input_path),
           str(output_path),
           trim_just_beginning_and_end=True,
           use_cuda=torch.cuda.is_available()
       )
       # If no speech detected, add to list
       if not is_speech:
           no_speech_files.append(str(output_path))
   except Exception as e:
       print(f"Error processing {relative_path}: {str(e)}")

print("\nProcessing complete")

# Write list of files with no speech detected
if no_speech_files:
   log_path = input_folder.parent / "no_speech_files.txt"
   with open(log_path, "w", encoding="utf-8") as f:
       for file in no_speech_files:
           f.write(f"{file}\n")
   print(f"\nFound {len(no_speech_files)} files with no speech. List saved to {log_path}")