In [None]:
import os
from pathlib import Path
import concurrent.futures
from functools import partial
from tqdm import tqdm
import librosa
import soundfile as sf
from glob import glob

In [None]:
audio_pathes = glob('/home/penguin/Data/cv-corpus-22.0-2025-06-20-ka/cv-corpus-22.0-2025-06-20/ka/clips/*.mp3')
new_root_path = '/home/penguin/Data/cv-corpus-22.0-2025-06-20-ka/cv-corpus-22.0-2025-06-20/ka/clips_16k/'

In [None]:
def _resample_write(src_path: str, dst_dir: str) -> tuple[str, None | Exception]:
    try:
        sig, sr = librosa.load(src_path, sr=16000)
        dst = Path(dst_dir) / Path(src_path).name.replace('.mp3', '.flac')
        sf.write(str(dst), sig, 16000, format='FLAC')
        return (src_path, None)
    except Exception as e:
        return (src_path, e)

In [None]:
# choose number of workers (tweak as needed)
max_workers = min(8, os.cpu_count() or 1)

In [None]:
max_files = len(audio_pathes)
selected = audio_pathes[:max_files]

In [None]:
errors = []
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as ex:
    futures = {ex.submit(_resample_write, p, new_root_path): p for p in selected}
    for fut in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
        src, err = fut.result()
        if err is not None:
            errors.append((src, err))

if errors:
    print(f"Completed with {len(errors)} errors. Example: {errors[:3]}")
else:
    print(f"Successfully processed {len(selected)} files.")