In [1]:
!pip install imageio-ffmpeg torch torchvision



In [2]:
import os
import sys
import logging
import tempfile
import shutil
import subprocess
import traceback
import gc
from typing import List, Set, Dict, Optional, Any
from dataclasses import dataclass, field
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed
from contextlib import contextmanager
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import multiprocessing as mp
import threading
from dataclasses import asdict
from sagemaker.pytorch import PyTorchProcessor
from sagemaker import get_execution_role
from sagemaker.processing import ProcessingOutput

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [3]:
mp.set_start_method("spawn", force=True) 

_PIPELINE = None
_PIPELINE_DEVICE = None

In [4]:
# Import torch first to avoid registration conflicts
import torch
import torchvision  # Import this explicitly before transformers

# Env setup
import os
import imageio_ffmpeg, os
os.environ["PATH"] = os.path.dirname(imageio_ffmpeg.get_ffmpeg_exe()) + os.pathsep + os.environ.get("PATH","")
os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/transformers_cache")
os.environ.setdefault("HF_HOME", "/tmp/hf_home")
os.environ.setdefault("TORCH_HOME", "/tmp/torch_home")

import boto3
from botocore.exceptions import ClientError
import pandas as pd
from tqdm import tqdm
import imageio_ffmpeg
from transformers import pipeline
import argparse, os, logging

2025-10-07 03:43:19.711098: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-07 03:43:19.729589: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759808599.750165   30653 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759808599.757035   30653 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-07 03:43:19.779251: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [5]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler(sys.stdout)],
    force = True,
)
logger = logging.getLogger("asr_pipeline")

In [6]:
# =============================================== Config ===============================================
@dataclass
class Config:
    """Configuration for the transcription pipeline"""
    # S3 settings
    s3_input: str = "s3://asrelder-data/common_voice/23/cv-corpus-23.0-2025-09-05/en/clips/"
    output_local_csv: str = "./transcripts_from_prefix.csv"
    write_back_to_s3: bool = False
    output_s3_uri: Optional[str] = None
    #output_s3_uri: str = "s3://asrelder-data/outputs/transcripts_from_prefix.csv" uncomment if we want it back on s3
    validation_csv_path: str = "common_voices_23_train_with_validated_votes.csv"
    validation_csv_column: str = "path"

    # Processing settings
    max_files: Optional[int] = None
    download_workers: int = 24
    append_every_n: int = 200
    resume_from_csv: bool = True

    # Model settings
    model_id: str = "openai/whisper-base"
    language: Optional[str] = "en"
    task: str = "transcribe"
    chunk_length_s: int = 30
    stride_length_s: tuple = (5, 5)

    # File settings
    audio_extensions: List[str] = field(default_factory=lambda: [".mp3", ".wav", ".flac", ".m4a", ".ogg"])

In [7]:
def _build_worker_pipeline(cfg_dict, device_id):
    use_cuda = torch.cuda.is_available() and device_id >= 0
    dtype = torch.float16 if use_cuda else torch.float32
    generate_kwargs = {}
    if cfg_dict.get("language"): generate_kwargs["language"] = cfg_dict["language"]
    if cfg_dict.get("task"):     generate_kwargs["task"] = cfg_dict["task"]
    return pipeline(
        "automatic-speech-recognition",
        model=cfg_dict["model_id"],
        device=(device_id if use_cuda else -1),
        torch_dtype=dtype,
        return_timestamps=True,
        chunk_length_s=cfg_dict["chunk_length_s"],
        stride_length_s=tuple(cfg_dict["stride_length_s"]),
        generate_kwargs=(generate_kwargs or None),
    )

def _transcribe_worker(args):
    # args: (audio_path, cfg_dict, device_id)
    audio_path, cfg_dict, device_id = args
    global _PIPELINE, _PIPELINE_DEVICE
    if (_PIPELINE is None) or (_PIPELINE_DEVICE != device_id):
        _PIPELINE = _build_worker_pipeline(cfg_dict, device_id)
        _PIPELINE_DEVICE = device_id
    try:
        out = _PIPELINE(audio_path)
        text = out.get("text", "") if isinstance(out, dict) else str(out)
        return {"path": audio_path, "text": text, "error": ""}
    except Exception as e:
        return {"path": audio_path, "text": "", "error": f"{type(e).__name__}: {e}"}

In [8]:
# =============================================== Core Components ===============================================
class FFmpegSetup:
    """Manages FFmpeg availability"""

    @staticmethod
    def ensure_available() -> Optional[str]:
        """Check if ffmpeg is available on PATH"""
        ff = None
        try:
            ff = imageio_ffmpeg.get_ffmpeg_exe()
        except Exception as e:
            logger.warning(f"imageio-ffmpeg error: {e}")

        if ff and os.path.exists(ff):
            ff_dir = os.path.dirname(ff)
            os.environ["PATH"] = ff_dir + os.pathsep + os.environ.get("PATH", "")

        resolved = shutil.which("ffmpeg")
        if resolved:
            try:
                out = subprocess.run(
                    [resolved, "-version"],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    check=True,
                    timeout=5
                )
                logger.info(f"ffmpeg: {resolved} | {out.stdout.decode('utf-8').splitlines()[0]}")
            except Exception:
                logger.info(f"ffmpeg: {resolved} (version check failed)")
        else:
            # Create shim if needed
            if ff and os.path.exists(ff):
                bin_dir = os.path.expanduser("~/.local/bin")
                os.makedirs(bin_dir, exist_ok=True)
                shim = os.path.join(bin_dir, "ffmpeg")
                with open(shim, "w") as f:
                    f.write(f"#!/usr/bin/env bash\n\"{ff}\" \"$@\"\n")
                os.chmod(shim, 0o755)
                os.environ["PATH"] = bin_dir + os.pathsep + os.environ.get("PATH", "")
                resolved = shutil.which("ffmpeg")
                if resolved:
                    logger.info(f"ffmpeg shim created: {resolved}")

        if not resolved:
            logger.warning("FFmpeg not found; use torchaudio fallback")

        return resolved

class S3Manager:
    """Handles S3 operations"""

    def __init__(self, config: Config):
        self.config = config
        self.client = boto3.client("s3")

    def parse_uri(self, uri: str) -> tuple[str, str]:
        """Parse S3 URI into bucket and key"""
        if not uri.startswith("s3://"):
            raise ValueError(f"Invalid S3 URI: {uri}")
        p = urlparse(uri)
        return p.netloc, p.path.lstrip("/")

    def is_audio_file(self, key: str) -> bool:
        """Check if key is an audio file"""
        return any(key.lower().endswith(ext) for ext in self.config.audio_extensions)

    def list_audio_keys(self, bucket: str, prefix: str, allowed_filenames: Optional[Set[str]] = None) -> List[str]:
        """List all audio keys under prefix, optionally filtered by allowed filenames"""
        if self.is_audio_file(prefix):
            return [prefix]

        keys = []
        paginator = self.client.get_paginator("list_objects_v2")

        for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
            for obj in page.get("Contents", []):
                key = obj["Key"]
                if not key.endswith("/") and self.is_audio_file(key):
                    if allowed_filenames:
                        filename = os.path.basename(key)
                        if filename not in allowed_filenames:
                            continue
                            
                    keys.append(key)
                    if self.config.max_files and len(keys) >= self.config.max_files:
                        return keys
        return keys

    def download_to_temp(self, bucket: str, key: str) -> str:
        """Download S3 object to temporary file"""
        _, ext = os.path.splitext(key)
        if not ext:
            ext = ".mp3"

        fd, tmp_path = tempfile.mkstemp(suffix=ext)
        os.close(fd)

        with open(tmp_path, "wb") as f:
            self.client.download_fileobj(bucket, key, f)

        return tmp_path

    def upload_file(self, local_path: str, s3_uri: str):
        """Upload file to S3"""
        bucket, key = self.parse_uri(s3_uri)
        self.client.upload_file(local_path, bucket, key)
        logger.info(f"Uploaded to {s3_uri}")

class TranscriptionManager:
    """Manages multiple ASR pipelines for parallel processing"""

    def __init__(self, config: Config):
        self.config = config
        self.pipelines = []
        self.pipeline_locks = []

        num_workers = getattr(config, 'asr_workers', 16)

        # Create multiple pipeline instances
        for i in range(num_workers):
            if torch.cuda.is_available():
                device_id = i % torch.cuda.device_count()
            else:
                device_id = -1

            logger.info(f"Creating ASR worker {i+1}/{num_workers} on device {device_id}")
            pipe = self._build_pipeline(device_id=device_id)
            self.pipelines.append(pipe)
            self.pipeline_locks.append(threading.Lock())

    def _build_pipeline(self, device_id=-1):
        """Build the ASR pipeline"""
        use_cuda = torch.cuda.is_available() and device_id >= 0
        dtype = torch.float16 if use_cuda else torch.float32

        generate_kwargs = {}
        if self.config.language:
            generate_kwargs["language"] = self.config.language
        if self.config.task:
            generate_kwargs["task"] = self.config.task

        logger.info(f"Loading ASR: {self.config.model_id} (device={device_id}, dtype={dtype})")

        return pipeline(
            "automatic-speech-recognition",
            model=self.config.model_id,
            device=device_id,
            torch_dtype=dtype,
            return_timestamps=True,
            chunk_length_s=self.config.chunk_length_s,
            stride_length_s=self.config.stride_length_s,
            generate_kwargs=generate_kwargs or None,
        )

    def transcribe(self, audio_path: str) -> Dict[str, Any]:
        """Single file transcription - uses first pipeline"""
        return self._transcribe_with_pipeline(audio_path, 0)

    def transcribe_batch_parallel(self, paths_and_keys: List[tuple]) -> List[Dict[str, Any]]:
        """Transcribe multiple files in parallel using ThreadPoolExecutor"""
        num_workers = len(self.pipelines)

        with ThreadPoolExecutor(max_workers=num_workers) as executor:
            futures = []
            for i, (path, key) in enumerate(paths_and_keys):
                pipeline_idx = i % num_workers
                future = executor.submit(
                    self._transcribe_with_pipeline_and_key,
                    path,
                    key,
                    pipeline_idx
                )
                futures.append(future)

            # Collect results
            results = []
            for future in as_completed(futures):
                try:
                    result = future.result()
                    results.append(result)
                except Exception as e:
                    logger.error(f"Transcription failed: {e}")

            return results

    def _transcribe_with_pipeline(self, audio_path: str, pipeline_idx: int) -> Dict[str, Any]:
        """Transcribe using a specific pipeline instance"""
        with self.pipeline_locks[pipeline_idx]:
            try:
                result = self.pipelines[pipeline_idx](audio_path)
                text = result.get("text", "") if isinstance(result, dict) else str(result)
                return {"text": text, "error": None}
            except Exception as e:
                if "ffmpeg" in str(e).lower():
                    return self._fallback_transcribe(audio_path, pipeline_idx)
                return {"text": "", "error": f"{type(e).__name__}: {e}"}

    def _transcribe_with_pipeline_and_key(self, audio_path: str, s3_key: str, pipeline_idx: int) -> Dict[str, Any]:
        """Transcribe with key tracking for batch processing"""
        result = self._transcribe_with_pipeline(audio_path, pipeline_idx)

        if result["text"]:
            preview = result["text"][:100] + "..." if len(result["text"]) > 100 else result["text"]
            logger.info(f"✓ Worker {pipeline_idx}: {os.path.basename(s3_key)}: {preview}")
        else:
            logger.warning(f"✗ Worker {pipeline_idx}: {os.path.basename(s3_key)}: {result['error']}")

        return {
            "s3_key": s3_key,
            "filename": os.path.basename(s3_key),
            "transcribed_text": result["text"],
            "error": result["error"] or ""
        }

    def _fallback_transcribe(self, audio_path: str, pipeline_idx: int) -> Dict[str, Any]:
        """Fallback using torchaudio"""
        try:
            import torchaudio
            waveform, sr = torchaudio.load(audio_path)
            if waveform.ndim == 2:
                waveform = waveform.mean(dim=0, keepdim=True)

            with self.pipeline_locks[pipeline_idx]:
                result = self.pipelines[pipeline_idx](waveform.squeeze(0).numpy(), sampling_rate=sr)
                text = result.get("text", "") if isinstance(result, dict) else str(result)
                return {"text": text, "error": None}
        except Exception as e:
            return {"text": "", "error": f"Fallback failed: {e}"}

    def cleanup_gpu_memory(self):
        """Clean up GPU memory"""
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()


class CSVManager:
    """Handles CSV operations """

    def __init__(self, csv_path: str):
        self.csv_path = csv_path

    def read_processed_keys(self) -> Set[str]:
        """Read already processed S3 keys from CSV"""
        if not os.path.exists(self.csv_path):
            return set()
        try:
            df = pd.read_csv(self.csv_path, usecols=["s3_key"])
            return set(df["s3_key"].astype(str).tolist())
        except Exception as e:
            logger.warning(f"Could not read existing CSV: {e}")
            return set()

    def append_results(self, results: List[Dict[str, Any]]):
        """Append results to CSV"""
        if not results:
            return

        df = pd.DataFrame(results)
        mode = "a" if os.path.exists(self.csv_path) else "w"
        header = not os.path.exists(self.csv_path)

        df.to_csv(self.csv_path, index=False, mode=mode, header=header)
        logger.info(f"Appended {len(results)} rows to {self.csv_path}")

In [9]:
class ValidationCSVManager:
    """Handles the CSV with validated files"""

    def __init__(self, csv_path: str, column_name: str):
        self.csv_path = csv_path
        self.column_name = column_name

    def read_allowed_filenames(self) -> Set[str]:
        """Read the list of allowed filenames from the validation CSV"""
        if not os.path.exists(self.csv_path):
            raise FileNotFoundError(f"Validation CSV not found: {self.csv_path}")

        try:
            df = pd.read_csv(self.csv_path, usecols=[self.column_name])
            filenames = df[self.column_name].astype(str).tolist()
            # Remove any NaN values and strip whitespace
            filenames = [f.strip() for f in filenames if pd.notna(f) and f.strip()]
            logger.info(f"Loaded {len(filenames)} allowed filenames from {self.csv_path}")
            return set(filenames)
        except Exception as e:
            logger.error(f"Failed to read validation CSV: {e}")
            raise

In [10]:
class ProductionPipeline:
    """Main for the transcription pipeline"""

    def __init__(self, config: Config):
        self.config = config
        self.s3_manager = S3Manager(config)
        self.transcription_manager = TranscriptionManager(config)
        self.csv_manager = CSVManager(config.output_local_csv)
        self.validation_csv_manager = ValidationCSVManager(
            config.validation_csv_path,
            config.validation_csv_column
        )
        self.results_buffer: List[Dict[str, Any]] = []

    def run(self):
        """Execute the transcription pipeline"""
        # Setup
        FFmpegSetup.ensure_available()

        allowed_filenames = self.validation_csv_manager.read_allowed_filenames()
        logger.info(f"Will only process files from validation CSV: {len(allowed_filenames)} files")


        bucket, prefix = self.s3_manager.parse_uri(self.config.s3_input)
        prefix_norm = prefix.rstrip("/") + "/"

        # Normalize allow-list to just basenames so it matches S3 keys
        def _norm(name: str) -> str:
            name = (name or "").strip().lstrip("/")
            return os.path.basename(name)

        allowed_filenames = { _norm(x) for x in self.validation_csv_manager.read_allowed_filenames() }
        logger.info(f"Will only process files from validation CSV (normalized): {len(allowed_filenames)}")


        existing = []
        paginator = self.s3_manager.client.get_paginator("list_objects_v2")
        for page in paginator.paginate(Bucket=bucket, Prefix=prefix_norm):
            for obj in page.get("Contents", []):
                key = obj["Key"]
                if key.endswith("/"):
                    continue
                if not self.s3_manager.is_audio_file(key):
                    continue
                if os.path.basename(key) in allowed_filenames:
                    existing.append(key)

        all_keys = existing
        logger.info(f"Using {len(all_keys)} existing keys from CSV list (after intersection).")



        logger.info(f"Found {len(all_keys)} matching files in S3")

        processed_keys = set()
        if self.config.resume_from_csv:
            processed_keys = self.csv_manager.read_processed_keys()

        keys_to_process = [k for k in all_keys if k not in processed_keys]

        logger.info(
            f"Processing {len(keys_to_process)} files "
            f"(skipped {len(processed_keys)} already done)"
        )

        if not keys_to_process:
            logger.info("No files to process")
            return

        # Process with concurrent downloads
        self._process_with_concurrency(bucket, keys_to_process)

        # Final flush
        self._flush_results()

        # Upload to S3 if configured
        if self.config.write_back_to_s3 and self.config.output_s3_uri:
            self.s3_manager.upload_file(
                self.config.output_local_csv,
                self.config.output_s3_uri
            )

        logger.info("Pipeline complete!")


    def _process_with_concurrency(self, bucket: str, keys: List[str]):
        """Process files with concurrent downloads and parallel transcription"""
        batch_size = max(1, self.config.download_workers * 2)

        with ThreadPoolExecutor(max_workers=self.config.download_workers) as pool:
            for i in range(0, len(keys), batch_size):
                batch = keys[i:i + batch_size]

                # Download batch
                downloaded = []
                futures = {
                    pool.submit(self.s3_manager.download_to_temp, bucket, k): k
                    for k in batch
                }

                progress = tqdm(
                    as_completed(futures),
                    total=len(futures),
                    desc=f"Downloading batch {i//batch_size + 1}"
                )

                for future in progress:
                    key = futures[future]
                    try:
                        local_path = future.result()
                        downloaded.append((local_path, key))
                    except Exception as e:
                        logger.error(f"Download failed for {key}: {e}")
                        self.results_buffer.append({
                            "s3_key": key,
                            "filename": os.path.basename(key),
                            "transcribed_text": "",
                            "error": f"Download failed: {e}"
                        })

                # Transcribe downloaded files in parallel
                if downloaded:
                    logger.info(f"Transcribing {len(downloaded)} files in parallel...")
                    results = self.transcription_manager.transcribe_batch_parallel(downloaded)

                    for result in results:
                        self.results_buffer.append(result)

                    # Clean up temp files
                    for local_path, _ in downloaded:
                        try:
                            os.remove(local_path)
                        except:
                            pass

                # Periodic flush
                if len(self.results_buffer) >= self.config.append_every_n:
                    self._flush_results()

                # Memory cleanup
                self.transcription_manager.cleanup_gpu_memory()

    def _process_single_file(self, future, key: str):
        """Process a single downloaded file"""
        local_path = None
        try:
            # Get downloaded file
            local_path = future.result()

            # Transcribe
            result = self.transcription_manager.transcribe(local_path)

            # Store result
            self.results_buffer.append({
                "s3_key": key,
                "filename": os.path.basename(key),
                "transcribed_text": result["text"],
                "error": result["error"] or ""
            })

            if result["text"]:
                preview = result["text"][:100] + "..." if len(result["text"]) > 100 else result["text"]
                logger.info(f"{os.path.basename(key)}: {preview}")
            else:
                logger.warning(f"{os.path.basename(key)}: {result['error']}")

            # Periodic flush
            if len(self.results_buffer) >= self.config.append_every_n:
                self._flush_results()

            # Memory cleanup
            self.transcription_manager.cleanup_gpu_memory()

        except Exception as e:
            tb = traceback.format_exc(limit=2)
            self.results_buffer.append({
                "s3_key": key,
                "filename": os.path.basename(key),
                "transcribed_text": "",
                "error": f"{type(e).__name__}: {e} | {tb}"
            })
            logger.error(f"Failed to process {key}: {e}")

        finally:
            # Clean up temp file
            if local_path and os.path.exists(local_path):
                try:
                    os.remove(local_path)
                except Exception:
                    pass

    def _flush_results(self):
        """Flush results buffer to CSV"""
        if self.results_buffer:
            self.csv_manager.append_results(self.results_buffer)
            self.results_buffer.clear()

In [11]:
# ======================================== Run main loop ========================================
def main():
    try:
        config = Config()
        pipeline = ProductionPipeline(config)
        pipeline.run()
        return 0
    except KeyboardInterrupt:
        logger.info("Interrupted by user")
        return 130
    except Exception as e:
        logger.error(f"Pipeline failed: {e}")
        logger.error(traceback.format_exc())
        return 1


if __name__ == "__main__":
    result = main()
    if result == 0:
        print("Pipeline completed successfully!")
    else:
        print(f"Pipeline failed with code: {result}")

2025-10-07 03:43:25,997 INFO - Creating ASR worker 1/16 on device 0
2025-10-07 03:43:25,998 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cuda:0


2025-10-07 03:43:27,384 INFO - Creating ASR worker 2/16 on device 0
2025-10-07 03:43:27,384 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:27,905 INFO - Creating ASR worker 3/16 on device 0
2025-10-07 03:43:27,905 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:28,415 INFO - Creating ASR worker 4/16 on device 0
2025-10-07 03:43:28,416 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:28,917 INFO - Creating ASR worker 5/16 on device 0
2025-10-07 03:43:28,918 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:29,430 INFO - Creating ASR worker 6/16 on device 0
2025-10-07 03:43:29,430 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:29,972 INFO - Creating ASR worker 7/16 on device 0
2025-10-07 03:43:29,972 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:30,474 INFO - Creating ASR worker 8/16 on device 0
2025-10-07 03:43:30,474 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:30,985 INFO - Creating ASR worker 9/16 on device 0
2025-10-07 03:43:30,985 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:31,496 INFO - Creating ASR worker 10/16 on device 0
2025-10-07 03:43:31,497 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:32,035 INFO - Creating ASR worker 11/16 on device 0
2025-10-07 03:43:32,036 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:32,555 INFO - Creating ASR worker 12/16 on device 0
2025-10-07 03:43:32,556 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:33,057 INFO - Creating ASR worker 13/16 on device 0
2025-10-07 03:43:33,058 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:33,591 INFO - Creating ASR worker 14/16 on device 0
2025-10-07 03:43:33,591 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:34,108 INFO - Creating ASR worker 15/16 on device 0
2025-10-07 03:43:34,108 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:34,634 INFO - Creating ASR worker 16/16 on device 0
2025-10-07 03:43:34,634 INFO - Loading ASR: openai/whisper-base (device=0, dtype=torch.float16)


Device set to use cuda:0


2025-10-07 03:43:35,137 INFO - ffmpeg shim created: /home/sagemaker-user/.local/bin/ffmpeg
2025-10-07 03:43:37,460 INFO - Loaded 699129 allowed filenames from common_voices_23_train_with_validated_votes.csv
2025-10-07 03:43:37,598 INFO - Will only process files from validation CSV: 699129 files
2025-10-07 03:43:39,904 INFO - Loaded 699129 allowed filenames from common_voices_23_train_with_validated_votes.csv
2025-10-07 03:43:40,570 INFO - Will only process files from validation CSV (normalized): 699129
2025-10-07 03:52:10,409 INFO - Using 699129 existing keys from CSV list (after intersection).
2025-10-07 03:52:10,410 INFO - Found 699129 matching files in S3
2025-10-07 03:52:10,466 INFO - Processing 699129 files (skipped 0 already done)


Downloading batch 1:  50%|█████     | 24/48 [00:00<00:00, 224.94it/s]



Downloading batch 1: 100%|██████████| 48/48 [00:00<00:00, 206.50it/s]

2025-10-07 03:52:10,909 INFO - Transcribing 48 files in parallel...





2025-10-07 03:52:14,760 INFO - ✓ Worker 8: common_voice_en_102385.mp3:  in my book of memory.
2025-10-07 03:52:14,774 INFO - ✓ Worker 11: common_voice_en_110094.mp3:  Let me talk to him.
2025-10-07 03:52:14,971 INFO - ✓ Worker 6: common_voice_en_101627.mp3:  Two children running in the field.
2025-10-07 03:52:15,411 INFO - ✓ Worker 15: common_voice_en_101622.mp3:  Three men are painting a metal wall white.
2025-10-07 03:52:15,562 INFO - ✓ Worker 14: common_voice_en_10324161.mp3:  Choose between the high road and the low.
2025-10-07 03:52:15,608 INFO - ✓ Worker 0: common_voice_en_10110.mp3:  I really liked the film we saw last week.
2025-10-07 03:52:15,994 INFO - ✓ Worker 2: common_voice_en_100704.mp3:  Please look up the song, Twist of Shadows.
2025-10-07 03:52:16,186 INFO - ✓ Worker 3: common_voice_en_101616.mp3:  Men in Orange Wests are at work on the construction site.
2025-10-07 03:52:16,217 INFO - ✓ Worker 10: common_voice_en_10187.mp3:  Though this be madness, yet there is method

Downloading batch 2:   2%|▏         | 1/48 [00:00<00:07,  6.62it/s]



Downloading batch 2:  42%|████▏     | 20/48 [00:00<00:00, 95.30it/s]



Downloading batch 2:  73%|███████▎  | 35/48 [00:00<00:00, 115.15it/s]



Downloading batch 2: 100%|██████████| 48/48 [00:00<00:00, 114.74it/s]

2025-10-07 03:52:27,909 INFO - Transcribing 48 files in parallel...





2025-10-07 03:52:29,923 INFO - ✓ Worker 11: common_voice_en_113625.mp3:  Full Circle
2025-10-07 03:52:30,890 INFO - ✓ Worker 9: common_voice_en_116623.mp3:  Young boys play sports together.
2025-10-07 03:52:31,073 INFO - ✓ Worker 8: common_voice_en_115744.mp3:  We all hope I am right.
2025-10-07 03:52:31,278 INFO - ✓ Worker 4: common_voice_en_11333.mp3:  A tractor is driving the field.
2025-10-07 03:52:31,284 INFO - ✓ Worker 0: common_voice_en_112554.mp3:  This child is getting a pedicure.
2025-10-07 03:52:31,662 INFO - ✓ Worker 10: common_voice_en_115769.mp3:  Everyone knows best where the shoe pinches him.
2025-10-07 03:52:31,742 INFO - ✓ Worker 1: common_voice_en_112553.mp3:  A man walking in front of a cleaning man.
2025-10-07 03:52:31,759 INFO - ✓ Worker 15: common_voice_en_116619.mp3:  The elderly gather for a Christmas vigil.
2025-10-07 03:52:31,794 INFO - ✓ Worker 6: common_voice_en_11335.mp3:  Add the tune to the rage radio playlist.
2025-10-07 03:52:31,937 INFO - ✓ Worker 13:

Downloading batch 3:   2%|▏         | 1/48 [00:00<00:07,  6.40it/s]



Downloading batch 3:  42%|████▏     | 20/48 [00:00<00:00, 92.55it/s]



Downloading batch 3:  90%|████████▉ | 43/48 [00:00<00:00, 146.45it/s]



Downloading batch 3: 100%|██████████| 48/48 [00:00<00:00, 124.59it/s]

2025-10-07 03:52:42,845 INFO - Transcribing 48 files in parallel...





2025-10-07 03:52:46,426 INFO - ✓ Worker 11: common_voice_en_116661.mp3:  Two brown dogs fight over some cloth.
2025-10-07 03:52:46,436 INFO - ✓ Worker 13: common_voice_en_116662.mp3:  A group of people gather for dinner.
2025-10-07 03:52:46,583 INFO - ✓ Worker 5: common_voice_en_116666.mp3:  Kids watch silently from behind a concert barrier.
2025-10-07 03:52:46,711 INFO - ✓ Worker 8: common_voice_en_116663.mp3:  Two elderly women are talking at a gathering.
2025-10-07 03:52:46,717 INFO - ✓ Worker 2: common_voice_en_116653.mp3:  Several smiling children are going down a slide.
2025-10-07 03:52:46,806 INFO - ✓ Worker 4: common_voice_en_116664.mp3:  A child stands upside down in a pool.
2025-10-07 03:52:46,957 INFO - ✓ Worker 1: common_voice_en_116656.mp3:  A man and a woman running in the street.
2025-10-07 03:52:47,301 INFO - ✓ Worker 15: common_voice_en_116668.mp3:  A man is rock climbing up the side of a cliff.
2025-10-07 03:52:47,703 INFO - ✓ Worker 12: common_voice_en_116675.mp3:  T

Downloading batch 4:   2%|▏         | 1/48 [00:00<00:07,  6.23it/s]



Downloading batch 4:  38%|███▊      | 18/48 [00:00<00:00, 81.84it/s]



Downloading batch 4:  83%|████████▎ | 40/48 [00:00<00:00, 133.95it/s]



Downloading batch 4: 100%|██████████| 48/48 [00:00<00:00, 113.78it/s]

2025-10-07 03:52:58,662 INFO - Transcribing 48 files in parallel...





2025-10-07 03:53:02,263 INFO - ✓ Worker 12: common_voice_en_116723.mp3:  The people are gathered at a bar.
2025-10-07 03:53:02,387 INFO - ✓ Worker 1: common_voice_en_116718.mp3:  For construction workers working on a rail road.


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


2025-10-07 03:53:02,446 INFO - ✓ Worker 3: common_voice_en_116724.mp3:  This photo shows Asian art on display.
2025-10-07 03:53:02,751 INFO - ✓ Worker 2: common_voice_en_116719.mp3:  An Asian man in a brown shirt creating portraits.
2025-10-07 03:53:03,037 INFO - ✓ Worker 15: common_voice_en_116736.mp3:  A man with construction hat on attached to harness.
2025-10-07 03:53:03,094 INFO - ✓ Worker 5: common_voice_en_116730.mp3:  A woman with a yellow shirt stretching on a bridge.
2025-10-07 03:53:03,152 INFO - ✓ Worker 14: common_voice_en_116733.mp3:  Man following through on a bowling shot made in an alley.
2025-10-07 03:53:03,297 INFO - ✓ Worker 11: common_voice_en_116740.mp3:  A man in Myers and expensive looking yellow sports car.
2025-10-07 03:53:03,881 INFO - ✓ Worker 7: common_voice_en_116729.mp3:  A cyclist in a yellow helmet is biking through the woods and fall.
2025-10-07 03:53:03,891 INFO - ✓ Worker 6: common_voice_en_116716.mp3:  A woman is standing next to a car with her hand

Downloading batch 5:  44%|████▍     | 21/48 [00:00<00:00, 95.02it/s]



Downloading batch 5:  96%|█████████▌| 46/48 [00:00<00:00, 156.33it/s]



Downloading batch 5: 100%|██████████| 48/48 [00:00<00:00, 118.75it/s]

2025-10-07 03:53:14,199 INFO - Transcribing 48 files in parallel...





2025-10-07 03:53:17,384 INFO - ✓ Worker 8: common_voice_en_116891.mp3:  A man plays a musical instrument.
2025-10-07 03:53:17,422 INFO - ✓ Worker 5: common_voice_en_116781.mp3:  A tourist is reading a map.
2025-10-07 03:53:17,680 INFO - ✓ Worker 7: common_voice_en_116887.mp3:  A young boy is running a race.
2025-10-07 03:53:17,803 INFO - ✓ Worker 0: common_voice_en_116775.mp3:  Two people walked toward a potato chip truck.
2025-10-07 03:53:17,862 INFO - ✓ Worker 9: common_voice_en_116791.mp3:  A young boy's operates a pretend shop.
2025-10-07 03:53:18,332 INFO - ✓ Worker 11: common_voice_en_116784.mp3:  An adult and a child walking around outdoors in autumn.
2025-10-07 03:53:18,339 INFO - ✓ Worker 15: common_voice_en_116782.mp3:  These two women are busy with writing and computer use.
2025-10-07 03:53:18,480 INFO - ✓ Worker 3: common_voice_en_116779.mp3:  Two large grey dogs running through a grassy field.
2025-10-07 03:53:18,692 INFO - ✓ Worker 13: common_voice_en_116777.mp3:  A group

Downloading batch 6:   2%|▏         | 1/48 [00:00<00:07,  6.01it/s]



Downloading batch 6:  46%|████▌     | 22/48 [00:00<00:00, 96.32it/s]



Downloading batch 6:  75%|███████▌  | 36/48 [00:00<00:00, 112.95it/s]



Downloading batch 6: 100%|██████████| 48/48 [00:00<00:00, 101.60it/s]

2025-10-07 03:53:29,822 INFO - Transcribing 48 files in parallel...





2025-10-07 03:53:32,484 INFO - ✓ Worker 2: common_voice_en_119479.mp3:  a tourist taking pictures.
2025-10-07 03:53:32,925 INFO - ✓ Worker 11: common_voice_en_117400.mp3:  Play the Track Como.
2025-10-07 03:53:32,958 INFO - ✓ Worker 14: common_voice_en_116923.mp3:  Three people are performing music.
2025-10-07 03:53:33,132 INFO - ✓ Worker 9: common_voice_en_117674.mp3:  Find a picture of musical.
2025-10-07 03:53:33,219 INFO - ✓ Worker 1: common_voice_en_116925.mp3:  The boy is jumping in the air.
2025-10-07 03:53:33,505 INFO - ✓ Worker 6: common_voice_en_119488.mp3:  A band playing a small gig in Baltimore.
2025-10-07 03:53:33,673 INFO - ✓ Worker 5: common_voice_en_119491.mp3:  Several people gather around the laptop at a table.
2025-10-07 03:53:33,991 INFO - ✓ Worker 0: common_voice_en_117405.mp3:  I add tonic it to the fresh electronic playlist.
2025-10-07 03:53:34,255 INFO - ✓ Worker 8: common_voice_en_119486.mp3:  Chefs prepare food for patterns of a sushi restaurant.
2025-10-07 0

Downloading batch 7:  46%|████▌     | 22/48 [00:00<00:00, 95.57it/s]



Downloading batch 7:  94%|█████████▍| 45/48 [00:00<00:00, 143.14it/s]



Downloading batch 7: 100%|██████████| 48/48 [00:00<00:00, 113.61it/s]

2025-10-07 03:53:44,933 INFO - Transcribing 48 files in parallel...





2025-10-07 03:53:48,734 INFO - ✓ Worker 1: common_voice_en_119543.mp3:  A young girl is sliding down on a sled.
2025-10-07 03:53:48,862 INFO - ✓ Worker 7: common_voice_en_119544.mp3:  2 goals sitting by a tree while playing
2025-10-07 03:53:48,864 INFO - ✓ Worker 2: common_voice_en_121104.mp3:  A female artist sketching a portrait.
2025-10-07 03:53:49,284 INFO - ✓ Worker 8: common_voice_en_119539.mp3:  A girl is playing in the fountain fully clothed.
2025-10-07 03:53:49,397 INFO - ✓ Worker 9: common_voice_en_121231.mp3:  A woman is wearing a bright orange and brown shirt.
2025-10-07 03:53:49,507 INFO - ✓ Worker 13: common_voice_en_121234.mp3:  A dog catching a flexible frisbee in its mouth.
2025-10-07 03:53:49,665 INFO - ✓ Worker 14: common_voice_en_121239.mp3:  With a small brown dog, runs on the snow.
2025-10-07 03:53:49,858 INFO - ✓ Worker 0: common_voice_en_119538.mp3:  People relax in the grass at night time during a carnival.
2025-10-07 03:53:50,001 INFO - ✓ Worker 5: common_voic

Downloading batch 8:   2%|▏         | 1/48 [00:00<00:08,  5.77it/s]



Downloading batch 8:  48%|████▊     | 23/48 [00:00<00:00, 93.63it/s]



Downloading batch 8:  98%|█████████▊| 47/48 [00:00<00:00, 139.42it/s]



Downloading batch 8: 100%|██████████| 48/48 [00:00<00:00, 84.11it/s] 

2025-10-07 03:54:01,135 INFO - Transcribing 48 files in parallel...





2025-10-07 03:54:04,147 INFO - ✓ Worker 4: common_voice_en_122640.mp3:  Office workers having fun.
2025-10-07 03:54:04,873 INFO - ✓ Worker 7: common_voice_en_122616.mp3:  A woman in stockings and short shorts.
2025-10-07 03:54:05,032 INFO - ✓ Worker 9: common_voice_en_122678.mp3:  A boy is posing next to his scooter.
2025-10-07 03:54:05,276 INFO - ✓ Worker 12: common_voice_en_122643.mp3:  Two people on the subway are sleeping in the corner.
2025-10-07 03:54:05,287 INFO - ✓ Worker 14: common_voice_en_122647.mp3:  A lounge grill with pedestrians casually walking by.
2025-10-07 03:54:05,353 INFO - ✓ Worker 6: common_voice_en_122656.mp3:  A man stands outside his home by some yellow boxes.
2025-10-07 03:54:05,369 INFO - ✓ Worker 2: common_voice_en_122655.mp3:  A group of teenage boys hang out on a loft.
2025-10-07 03:54:05,385 INFO - ✓ Worker 5: common_voice_en_122626.mp3:  Two wrestlers jump in a ring wall in official watches.
2025-10-07 03:54:05,452 INFO - ✓ Worker 10: common_voice_en_12

Downloading batch 9:  42%|████▏     | 20/48 [00:00<00:00, 95.76it/s]



Downloading batch 9:  88%|████████▊ | 42/48 [00:00<00:00, 145.63it/s]



Downloading batch 9: 100%|██████████| 48/48 [00:00<00:00, 125.46it/s]

2025-10-07 03:54:16,739 INFO - Transcribing 48 files in parallel...





2025-10-07 03:54:19,812 INFO - ✓ Worker 13: common_voice_en_122781.mp3:  Three performers at a show.
2025-10-07 03:54:20,248 INFO - ✓ Worker 1: common_voice_en_122759.mp3:  A boy plays with a train set.
2025-10-07 03:54:20,366 INFO - ✓ Worker 11: common_voice_en_122761.mp3:  Two men fight over a waffle.
2025-10-07 03:54:20,461 INFO - ✓ Worker 10: common_voice_en_122793.mp3:  A young puppy cycles from his mother.
2025-10-07 03:54:20,620 INFO - ✓ Worker 5: common_voice_en_122749.mp3:  People outside of an outdoor world shop.
2025-10-07 03:54:20,730 INFO - ✓ Worker 7: common_voice_en_122783.mp3:  An inline skater grinding on a rail.
2025-10-07 03:54:21,153 INFO - ✓ Worker 14: common_voice_en_122789.mp3:  A group of unlockers are looking at a tree.
2025-10-07 03:54:21,265 INFO - ✓ Worker 12: common_voice_en_122774.mp3:  A man is standing on a rooftop looking at his camera.
2025-10-07 03:54:21,287 INFO - ✓ Worker 9: common_voice_en_122773.mp3:  A man standing on a table filming with people 

Downloading batch 10:   2%|▏         | 1/48 [00:00<00:08,  5.86it/s]



Downloading batch 10:  40%|███▉      | 19/48 [00:00<00:00, 84.31it/s]



Downloading batch 10:  75%|███████▌  | 36/48 [00:00<00:00, 115.44it/s]



Downloading batch 10: 100%|██████████| 48/48 [00:00<00:00, 118.75it/s]

2025-10-07 03:54:31,788 INFO - Transcribing 48 files in parallel...





2025-10-07 03:54:34,836 INFO - ✓ Worker 11: common_voice_en_122896.mp3:  Doug Hurden cows.
2025-10-07 03:54:34,970 INFO - ✓ Worker 1: common_voice_en_122868.mp3:  Two men wearing sunglasses.
2025-10-07 03:54:35,024 INFO - ✓ Worker 5: common_voice_en_122882.mp3:  A man surfing a crashing wave.
2025-10-07 03:54:35,075 INFO - ✓ Worker 12: common_voice_en_122904.mp3:  A girl underwater in a pool.
2025-10-07 03:54:35,754 INFO - ✓ Worker 2: common_voice_en_122855.mp3:  A man is swinging on a rope over water.
2025-10-07 03:54:35,890 INFO - ✓ Worker 4: common_voice_en_122870.mp3:  Man in blue shirt wearing a harness while cleaning windows.
2025-10-07 03:54:35,965 INFO - ✓ Worker 10: common_voice_en_122877.mp3:  A child making a sandcastle on the beach.
2025-10-07 03:54:36,265 INFO - ✓ Worker 15: common_voice_en_122891.mp3:  A woman performing with a guitar on a crowded street.
2025-10-07 03:54:36,372 INFO - ✓ Worker 14: common_voice_en_122881.mp3:  A woman shades herself from the sun with her 

Downloading batch 11:   2%|▏         | 1/48 [00:00<00:07,  6.19it/s]



Downloading batch 11:  50%|█████     | 24/48 [00:00<00:00, 110.66it/s]



Downloading batch 11:  94%|█████████▍| 45/48 [00:00<00:00, 146.87it/s]



Downloading batch 11: 100%|██████████| 48/48 [00:00<00:00, 117.06it/s]

2025-10-07 03:54:48,041 INFO - Transcribing 48 files in parallel...





2025-10-07 03:54:51,094 INFO - ✓ Worker 4: common_voice_en_125066.mp3:  Man watching child in hallway.
2025-10-07 03:54:51,218 INFO - ✓ Worker 10: common_voice_en_12492.mp3:  Give this novel four stars.
2025-10-07 03:54:51,321 INFO - ✓ Worker 1: common_voice_en_124800.mp3:  Escape board jumps down a bridge.
2025-10-07 03:54:51,442 INFO - ✓ Worker 2: common_voice_en_12487.mp3:  What animated movies are showing nearby?
2025-10-07 03:54:51,778 INFO - ✓ Worker 6: common_voice_en_125064.mp3:  I water my plants every day after sunset.
2025-10-07 03:54:51,817 INFO - ✓ Worker 14: common_voice_en_124798.mp3:  A woman sitting in an art museum writing.
2025-10-07 03:54:51,911 INFO - ✓ Worker 7: common_voice_en_125065.mp3:  A child force syrup on a spinning device.
2025-10-07 03:54:52,224 INFO - ✓ Worker 12: common_voice_en_125149.mp3:  Working from home has both drawbacks and advantages.
2025-10-07 03:54:52,347 INFO - ✓ Worker 15: common_voice_en_124801.mp3:  A group of three dogs are walking thr

Downloading batch 12:   2%|▏         | 1/48 [00:00<00:07,  6.20it/s]



Downloading batch 12:  40%|███▉      | 19/48 [00:00<00:00, 85.38it/s]



Downloading batch 12:  65%|██████▍   | 31/48 [00:00<00:00, 97.65it/s]



Downloading batch 12: 100%|██████████| 48/48 [00:00<00:00, 112.97it/s]

2025-10-07 03:55:02,586 INFO - Transcribing 48 files in parallel...





2025-10-07 03:55:05,690 INFO - ✓ Worker 0: common_voice_en_127894.mp3:  Five people working on something.
2025-10-07 03:55:05,861 INFO - ✓ Worker 3: common_voice_en_127901.mp3:  Two dogs play with a bottle.
2025-10-07 03:55:06,177 INFO - ✓ Worker 4: common_voice_en_127902.mp3:  A man plays guitar while sitting.
2025-10-07 03:55:06,236 INFO - ✓ Worker 1: common_voice_en_127912.mp3:  Three people play soccer in the sand.
2025-10-07 03:55:06,661 INFO - ✓ Worker 11: common_voice_en_127920.mp3:  A man standing at a table, standing things.
2025-10-07 03:55:07,097 INFO - ✓ Worker 7: common_voice_en_127900.mp3:  A man in Red Jersey walking next to the mountain.
2025-10-07 03:55:07,213 INFO - ✓ Worker 8: common_voice_en_127914.mp3:  A man discussing a story with a woman in a robe.
2025-10-07 03:55:07,244 INFO - ✓ Worker 6: common_voice_en_127897.mp3:  Toddler boy and overall is sliding down an orange slide.
2025-10-07 03:55:07,348 INFO - ✓ Worker 15: common_voice_en_127919.mp3:  A young girl we

Downloading batch 13:  38%|███▊      | 18/48 [00:00<00:00, 83.72it/s]



Downloading batch 13:  85%|████████▌ | 41/48 [00:00<00:00, 138.65it/s]



Downloading batch 13: 100%|██████████| 48/48 [00:00<00:00, 120.73it/s]

2025-10-07 03:55:17,949 INFO - Transcribing 48 files in parallel...





2025-10-07 03:55:21,485 INFO - ✓ Worker 8: common_voice_en_127975.mp3:  Men playing a game in the park.
2025-10-07 03:55:21,826 INFO - ✓ Worker 2: common_voice_en_127962.mp3:  A little girl is looking at the flowers.
2025-10-07 03:55:21,927 INFO - ✓ Worker 4: common_voice_en_127978.mp3:  A kid with a yellow shirt on looking.
2025-10-07 03:55:22,048 INFO - ✓ Worker 5: common_voice_en_127961.mp3:  Man in gold pants looking at the camera.
2025-10-07 03:55:22,123 INFO - ✓ Worker 10: common_voice_en_127963.mp3:  Some men crossing the street in a busy city.
2025-10-07 03:55:22,321 INFO - ✓ Worker 6: common_voice_en_127965.mp3:  Three young children feeding a turtle water from a blue container.
2025-10-07 03:55:22,331 INFO - ✓ Worker 0: common_voice_en_127970.mp3:  Three football players during a game making a tackle.
2025-10-07 03:55:22,809 INFO - ✓ Worker 12: common_voice_en_127969.mp3:  A man taking a picture with a camera that has a large lens.
2025-10-07 03:55:22,988 INFO - ✓ Worker 1: c

Downloading batch 14:   2%|▏         | 1/48 [00:00<00:07,  6.20it/s]



Downloading batch 14:  44%|████▍     | 21/48 [00:00<00:00, 94.95it/s]



Downloading batch 14:  85%|████████▌ | 41/48 [00:00<00:00, 134.20it/s]



Downloading batch 14: 100%|██████████| 48/48 [00:00<00:00, 120.40it/s]

2025-10-07 03:55:33,516 INFO - Transcribing 48 files in parallel...





2025-10-07 03:55:36,783 INFO - ✓ Worker 4: common_voice_en_131560.mp3:  The sun shines so brightly.
2025-10-07 03:55:37,418 INFO - ✓ Worker 15: common_voice_en_132099.mp3:  Man finished throwing bowling ball towards bowling pens.
2025-10-07 03:55:37,560 INFO - ✓ Worker 0: common_voice_en_130615.mp3:  Three people are putting condirs on a cake.
2025-10-07 03:55:37,563 INFO - ✓ Worker 10: common_voice_en_132054.mp3:  Man surfing on a wave in the ocean.
2025-10-07 03:55:37,598 INFO - ✓ Worker 2: common_voice_en_130611.mp3:  A man wearing a black jacket looking down.
2025-10-07 03:55:38,062 INFO - ✓ Worker 5: common_voice_en_130612.mp3:  Our man lying next to a horse made of sand.
2025-10-07 03:55:38,353 INFO - ✓ Worker 1: common_voice_en_130610.mp3:  A black and white dog is running and splashing in water.
2025-10-07 03:55:38,621 INFO - ✓ Worker 12: common_voice_en_130613.mp3:  Three men are playing golf while one is holding their golf bag.
2025-10-07 03:55:38,649 INFO - ✓ Worker 11: comm

Downloading batch 15:  81%|████████▏ | 39/48 [00:00<00:00, 129.81it/s]



Downloading batch 15: 100%|██████████| 48/48 [00:00<00:00, 119.88it/s]

2025-10-07 03:55:49,109 INFO - Transcribing 48 files in parallel...





2025-10-07 03:55:51,385 INFO - ✓ Worker 13: common_voice_en_137150.mp3:  Best thanks.
2025-10-07 03:55:52,527 INFO - ✓ Worker 2: common_voice_en_136449.mp3:  Let's listen to my few ship.
2025-10-07 03:55:52,755 INFO - ✓ Worker 0: common_voice_en_136401.mp3:  Two women on a movie set reviewing film.
2025-10-07 03:55:52,789 INFO - ✓ Worker 12: common_voice_en_1373590.mp3:  It takes heat to bring out the oil.
2025-10-07 03:55:52,999 INFO - ✓ Worker 14: common_voice_en_136418.mp3:  Racecar driver driving his car on the track.
2025-10-07 03:55:53,027 INFO - ✓ Worker 7: common_voice_en_136415.mp3:  I crew aboard a sailboat at sea.
2025-10-07 03:55:53,125 INFO - ✓ Worker 10: common_voice_en_136404.mp3:  A woman begins to climb onto her bike.
2025-10-07 03:55:53,243 INFO - ✓ Worker 5: common_voice_en_136419.mp3:  This little boy is sleeping on some type of furniture.
2025-10-07 03:55:53,503 INFO - ✓ Worker 3: common_voice_en_136410.mp3:  A man in a green scarf is bearing his teeth.
2025-10-07 

Downloading batch 16:  35%|███▌      | 17/48 [00:00<00:00, 77.28it/s]



Downloading batch 16:  88%|████████▊ | 42/48 [00:00<00:00, 145.89it/s]



Downloading batch 16: 100%|██████████| 48/48 [00:00<00:00, 117.23it/s]

2025-10-07 03:56:03,900 INFO - Transcribing 48 files in parallel...





2025-10-07 03:56:07,148 INFO - ✓ Worker 4: common_voice_en_143070.mp3:  Someone is making a sand sculpture.
2025-10-07 03:56:07,504 INFO - ✓ Worker 9: common_voice_en_143072.mp3:  Nightclub dancers showing her moves.
2025-10-07 03:56:07,678 INFO - ✓ Worker 6: common_voice_en_143091.mp3:  Four older people are sitting on a bench.
2025-10-07 03:56:07,692 INFO - ✓ Worker 8: common_voice_en_141414.mp3:  Many birds are sitting on a concrete wall.
2025-10-07 03:56:07,862 INFO - ✓ Worker 11: common_voice_en_143113.mp3:  a dog putting his legs into a pond.
2025-10-07 03:56:07,896 INFO - ✓ Worker 15: common_voice_en_148025.mp3:  This is the point. There's the rub.
2025-10-07 03:56:07,923 INFO - ✓ Worker 7: common_voice_en_143097.mp3:  The brown dog is standing on the sandy beach.
2025-10-07 03:56:08,149 INFO - ✓ Worker 1: common_voice_en_141413.mp3:  A man and woman walking down the street.
2025-10-07 03:56:08,165 INFO - ✓ Worker 10: common_voice_en_143060.mp3:  The lake was nestled in the ring

Downloading batch 17:   2%|▏         | 1/48 [00:00<00:08,  5.82it/s]



Downloading batch 17:  42%|████▏     | 20/48 [00:00<00:00, 89.31it/s]



Downloading batch 17:  85%|████████▌ | 41/48 [00:00<00:00, 135.78it/s]



Downloading batch 17: 100%|██████████| 48/48 [00:00<00:00, 114.14it/s]

2025-10-07 03:56:19,239 INFO - Transcribing 48 files in parallel...





2025-10-07 03:56:22,446 INFO - ✓ Worker 2: common_voice_en_155779.mp3:  Rate the hundred brothers to stars.
2025-10-07 03:56:22,545 INFO - ✓ Worker 12: common_voice_en_156039.mp3:  A small boy eating chocolate cake.
2025-10-07 03:56:22,545 INFO - ✓ Worker 6: common_voice_en_151638.mp3:  I thought you were asleep.
2025-10-07 03:56:22,909 INFO - ✓ Worker 0: common_voice_en_1524209.mp3:  He sent the boy on a short errand.
2025-10-07 03:56:23,263 INFO - ✓ Worker 15: common_voice_en_153038.mp3:  A bald man putting on a red shirt.
2025-10-07 03:56:23,320 INFO - ✓ Worker 3: common_voice_en_15268.mp3:  A sky jumper falls toward the sea and the earth.
2025-10-07 03:56:23,357 INFO - ✓ Worker 11: common_voice_en_15273.mp3:  A man tests model airplanes in an open field.
2025-10-07 03:56:23,906 INFO - ✓ Worker 8: common_voice_en_156034.mp3:  Two musicians are playing a tune together at a construction site.
2025-10-07 03:56:24,062 INFO - ✓ Worker 10: common_voice_en_156035.mp3:  A woman and child ar

Downloading batch 18:  33%|███▎      | 16/48 [00:00<00:00, 68.07it/s]



Downloading batch 18:  92%|█████████▏| 44/48 [00:00<00:00, 146.20it/s]



Downloading batch 18: 100%|██████████| 48/48 [00:00<00:00, 119.40it/s]

2025-10-07 03:56:34,115 INFO - Transcribing 48 files in parallel...





2025-10-07 03:56:37,390 INFO - ✓ Worker 8: common_voice_en_156092.mp3:  There are three kids outside sitting.
2025-10-07 03:56:37,589 INFO - ✓ Worker 11: common_voice_en_156085.mp3:  The medical team is performing a surgery.
2025-10-07 03:56:37,597 INFO - ✓ Worker 0: common_voice_en_156076.mp3:  Three women are walking on the sidewalk.
2025-10-07 03:56:38,204 INFO - ✓ Worker 12: common_voice_en_156102.mp3:  Three boys leaping into a swimming pool.
2025-10-07 03:56:38,318 INFO - ✓ Worker 2: common_voice_en_156078.mp3:  These people are gathered in a room reading something.
2025-10-07 03:56:38,613 INFO - ✓ Worker 7: common_voice_en_156099.mp3:  The group of girls are smiling while posing for a picture.
2025-10-07 03:56:38,629 INFO - ✓ Worker 15: common_voice_en_156094.mp3:  Biker fixing a flat on the side of a trail.
2025-10-07 03:56:38,706 INFO - ✓ Worker 9: common_voice_en_156091.mp3:  A boy on a deck with a red bucket on his head.
2025-10-07 03:56:38,722 INFO - ✓ Worker 13: common_voi

Downloading batch 19:   2%|▏         | 1/48 [00:00<00:08,  5.61it/s]



Downloading batch 19:  46%|████▌     | 22/48 [00:00<00:00, 91.54it/s]



Downloading batch 19: 100%|██████████| 48/48 [00:00<00:00, 115.34it/s]

2025-10-07 03:56:49,449 INFO - Transcribing 48 files in parallel...





2025-10-07 03:56:52,651 INFO - ✓ Worker 7: common_voice_en_156168.mp3:  A dog plays with the toy.
2025-10-07 03:56:53,125 INFO - ✓ Worker 14: common_voice_en_156157.mp3:  Three people are laying on a beach.
2025-10-07 03:56:53,558 INFO - ✓ Worker 3: common_voice_en_156151.mp3:  A man attempting to climb a fake rock wall.
2025-10-07 03:56:53,797 INFO - ✓ Worker 12: common_voice_en_156170.mp3:  Several young men are playing volleyball, one is jumping.
2025-10-07 03:56:53,819 INFO - ✓ Worker 6: common_voice_en_156150.mp3:  A boy with a golden crown and a white robe.
2025-10-07 03:56:53,837 INFO - ✓ Worker 1: common_voice_en_156154.mp3:  Eman is holding a sign directing people to a restaurant.
2025-10-07 03:56:54,035 INFO - ✓ Worker 8: common_voice_en_156156.mp3:  a man stands in the midst of a bike wheels.
2025-10-07 03:56:54,614 INFO - ✓ Worker 2: common_voice_en_156149.mp3:  ML Performer is singing on stage with his band while playing a guitar.
2025-10-07 03:56:54,685 INFO - ✓ Worker 11

Downloading batch 20:   2%|▏         | 1/48 [00:00<00:08,  5.78it/s]



Downloading batch 20:  44%|████▍     | 21/48 [00:00<00:00, 91.25it/s]



Downloading batch 20:  94%|█████████▍| 45/48 [00:00<00:00, 142.51it/s]



Downloading batch 20: 100%|██████████| 48/48 [00:00<00:00, 121.71it/s]

2025-10-07 03:57:05,139 INFO - Transcribing 48 files in parallel...





2025-10-07 03:57:08,204 INFO - ✓ Worker 7: common_voice_en_156231.mp3:  2 dogs in the snow.
2025-10-07 03:57:08,622 INFO - ✓ Worker 13: common_voice_en_156235.mp3:  Two men working construction are sitting down.
2025-10-07 03:57:08,646 INFO - ✓ Worker 1: common_voice_en_156215.mp3:  Two men with beards are walking.
2025-10-07 03:57:08,876 INFO - ✓ Worker 4: common_voice_en_156219.mp3:  A dog running up a sandy hill.
2025-10-07 03:57:09,209 INFO - ✓ Worker 3: common_voice_en_156208.mp3:  A little girl sitting on a string on the playground.
2025-10-07 03:57:09,321 INFO - ✓ Worker 2: common_voice_en_156206.mp3:  A singer is wearing a clown costume in the makeup.
2025-10-07 03:57:09,549 INFO - ✓ Worker 12: common_voice_en_156217.mp3:  The group of women is wearing an all-white costume.
2025-10-07 03:57:09,570 INFO - ✓ Worker 9: common_voice_en_156229.mp3:  Women play volleyball while 2 referees manage the game.
2025-10-07 03:57:09,619 INFO - ✓ Worker 5: common_voice_en_156225.mp3:  Numbers

Downloading batch 21:   2%|▏         | 1/48 [00:00<00:07,  6.30it/s]



Downloading batch 21:  46%|████▌     | 22/48 [00:00<00:00, 100.12it/s]



Downloading batch 21:  90%|████████▉ | 43/48 [00:00<00:00, 133.33it/s]



Downloading batch 21: 100%|██████████| 48/48 [00:00<00:00, 111.39it/s]

2025-10-07 03:57:20,838 INFO - Transcribing 48 files in parallel...





2025-10-07 03:57:24,138 INFO - ✓ Worker 0: common_voice_en_156274.mp3:  A band is performing on stage.
2025-10-07 03:57:24,406 INFO - ✓ Worker 7: common_voice_en_156286.mp3:  A group of men play rugby.
2025-10-07 03:57:24,768 INFO - ✓ Worker 13: common_voice_en_156303.mp3:  A woman blowing on a dandelion.
2025-10-07 03:57:25,035 INFO - ✓ Worker 12: common_voice_en_156280.mp3:  A man is standing and discussing with his colleagues.
2025-10-07 03:57:25,074 INFO - ✓ Worker 2: common_voice_en_156273.mp3:  A group of teenagers standing outside of a convenience store.
2025-10-07 03:57:25,110 INFO - ✓ Worker 14: common_voice_en_156275.mp3:  A man in a kilt playing the bug pipes.
2025-10-07 03:57:25,329 INFO - ✓ Worker 3: common_voice_en_156278.mp3:  The group of Asian teenagers are holding flags on the street.
2025-10-07 03:57:25,535 INFO - ✓ Worker 1: common_voice_en_156269.mp3:  A shirtless male is riding a skateboard, performing a trick.
2025-10-07 03:57:25,543 INFO - ✓ Worker 8: common_voi

Downloading batch 22:   2%|▏         | 1/48 [00:00<00:07,  6.47it/s]



Downloading batch 22:  44%|████▍     | 21/48 [00:00<00:00, 98.45it/s]



Downloading batch 22:  90%|████████▉ | 43/48 [00:00<00:00, 146.47it/s]



Downloading batch 22: 100%|██████████| 48/48 [00:00<00:00, 116.27it/s]

2025-10-07 03:57:36,585 INFO - Transcribing 48 files in parallel...





2025-10-07 03:57:40,161 INFO - ✓ Worker 12: common_voice_en_156350.mp3:  Two beige dogs playing the grass.
2025-10-07 03:57:40,868 INFO - ✓ Worker 2: common_voice_en_156344.mp3:  People are gathering under colored lights in a large tent.
2025-10-07 03:57:41,267 INFO - ✓ Worker 8: common_voice_en_156364.mp3:  Picture of a little boy on a sidewalk looking at the pigeon.
2025-10-07 03:57:41,289 INFO - ✓ Worker 11: common_voice_en_156377.mp3:  Some men are looking at the computer screen in an office.
2025-10-07 03:57:41,401 INFO - ✓ Worker 4: common_voice_en_156356.mp3:  A man and a human are kissing and taking a picture of themselves.
2025-10-07 03:57:41,431 INFO - ✓ Worker 6: common_voice_en_156369.mp3:  Young child playing with a soccer ball in a grassy area.
2025-10-07 03:57:41,585 INFO - ✓ Worker 7: common_voice_en_156357.mp3:  A boy laying on a rock in the middle of a calm stream.
2025-10-07 03:57:41,661 INFO - ✓ Worker 14: common_voice_en_156367.mp3:  A skier is jumping off a steep 

Downloading batch 23:  42%|████▏     | 20/48 [00:00<00:00, 91.17it/s]



Downloading batch 23:  90%|████████▉ | 43/48 [00:00<00:00, 145.96it/s]



Downloading batch 23: 100%|██████████| 48/48 [00:00<00:00, 118.09it/s]

2025-10-07 03:57:53,244 INFO - Transcribing 48 files in parallel...





2025-10-07 03:57:56,754 INFO - ✓ Worker 5: common_voice_en_156431.mp3:  A little kid running towards the camera.
2025-10-07 03:57:57,160 INFO - ✓ Worker 6: common_voice_en_156416.mp3:  and all straight with some people window shopping.
2025-10-07 03:57:57,168 INFO - ✓ Worker 0: common_voice_en_156409.mp3:  A group of men are running on a beach.
2025-10-07 03:57:57,215 INFO - ✓ Worker 15: common_voice_en_156420.mp3:  A boy in yellow walks down the street.
2025-10-07 03:57:57,733 INFO - ✓ Worker 2: common_voice_en_156411.mp3:  A small girl sits in the snow with a pink nose.
2025-10-07 03:57:57,746 INFO - ✓ Worker 10: common_voice_en_156422.mp3:  Men in colorful shirts show camaraderie after bicycle race.
2025-10-07 03:57:57,953 INFO - ✓ Worker 7: common_voice_en_156407.mp3:  Two men ride bikes down the street on a sunny day.
2025-10-07 03:57:58,042 INFO - ✓ Worker 3: common_voice_en_156424.mp3:  Three workmen working together. One is drilling the wood.
2025-10-07 03:57:58,066 INFO - ✓ Wo

Downloading batch 24:  31%|███▏      | 15/48 [00:00<00:00, 67.94it/s]



Downloading batch 24:  73%|███████▎  | 35/48 [00:00<00:00, 118.81it/s]



Downloading batch 24: 100%|██████████| 48/48 [00:00<00:00, 103.56it/s]

2025-10-07 03:58:09,058 INFO - Transcribing 48 files in parallel...





2025-10-07 03:58:11,610 INFO - ✓ Worker 12: common_voice_en_15735475.mp3:  Between their teeth.
2025-10-07 03:58:11,873 INFO - ✓ Worker 7: common_voice_en_158507.mp3:  That'll close my account.
2025-10-07 03:58:12,528 INFO - ✓ Worker 15: common_voice_en_156480.mp3:  A girl in black pants doing exercises.
2025-10-07 03:58:12,561 INFO - ✓ Worker 3: common_voice_en_158532.mp3:  Tokyo is so humid this time of year.
2025-10-07 03:58:12,866 INFO - ✓ Worker 0: common_voice_en_156477.mp3:  to Indian women trading to each other.
2025-10-07 03:58:12,957 INFO - ✓ Worker 1: common_voice_en_156483.mp3:  People playing soccer on a soccer field during sunset.
2025-10-07 03:58:13,253 INFO - ✓ Worker 2: common_voice_en_15798427.mp3:  She is absent from working in the drugstore today.
2025-10-07 03:58:13,394 INFO - ✓ Worker 14: common_voice_en_156485.mp3:  A man on a motorcycle is racing at the truck.
2025-10-07 03:58:13,483 INFO - ✓ Worker 10: common_voice_en_15734171.mp3:  A quart of link is modeled f

Downloading batch 25:  44%|████▍     | 21/48 [00:00<00:00, 93.40it/s]



Downloading batch 25:  96%|█████████▌| 46/48 [00:00<00:00, 154.65it/s]



Downloading batch 25: 100%|██████████| 48/48 [00:00<00:00, 124.25it/s]

2025-10-07 03:58:24,543 INFO - Transcribing 48 files in parallel...





2025-10-07 03:58:27,799 INFO - ✓ Worker 0: common_voice_en_160346.mp3:  Two girls in a swimming pool.
2025-10-07 03:58:28,086 INFO - ✓ Worker 1: common_voice_en_160345.mp3:  Two dogs in a grassy field.
2025-10-07 03:58:28,111 INFO - ✓ Worker 6: common_voice_en_160311.mp3:  Children are being splashed with water.
2025-10-07 03:58:28,246 INFO - ✓ Worker 2: common_voice_en_160359.mp3:  Man places order in coffee shop.
2025-10-07 03:58:28,404 INFO - ✓ Worker 9: common_voice_en_160348.mp3:  A woman kayaking with her small dog.
2025-10-07 03:58:28,634 INFO - ✓ Worker 11: common_voice_en_160347.mp3:  A little girl riding a toy horse during Christmas.
2025-10-07 03:58:28,739 INFO - ✓ Worker 10: common_voice_en_160360.mp3:  Three men examining clothing in a clothing shop.
2025-10-07 03:58:29,057 INFO - ✓ Worker 12: common_voice_en_160368.mp3:  A young Asian child is smiling from around the tree.
2025-10-07 03:58:29,094 INFO - ✓ Worker 3: common_voice_en_160358.mp3:  Everyone is taking in the sc

Downloading batch 26:  52%|█████▏    | 25/48 [00:00<00:00, 108.78it/s]



Downloading batch 26:  88%|████████▊ | 42/48 [00:00<00:00, 132.31it/s]



Downloading batch 26: 100%|██████████| 48/48 [00:00<00:00, 112.86it/s]

2025-10-07 03:58:40,388 INFO - Transcribing 48 files in parallel...





2025-10-07 03:58:43,869 INFO - ✓ Worker 6: common_voice_en_16086.mp3:  The smiling girl has climbed a tree.
2025-10-07 03:58:44,106 INFO - ✓ Worker 9: common_voice_en_16098.mp3:  A couple watching a show at a restaurant.
2025-10-07 03:58:44,161 INFO - ✓ Worker 11: common_voice_en_16105.mp3:  African children make funny faces at the camera.
2025-10-07 03:58:44,645 INFO - ✓ Worker 5: common_voice_en_16101.mp3:  A woman sits reading among many phases of flowers.
2025-10-07 03:58:44,651 INFO - ✓ Worker 15: common_voice_en_16106.mp3:  A woman is walking a blue bike across the road.
2025-10-07 03:58:44,698 INFO - ✓ Worker 1: common_voice_en_16085.mp3:  Two dark-haired girls are on a tire swing.
2025-10-07 03:58:44,720 INFO - ✓ Worker 3: common_voice_en_16089.mp3:  Three men excavating what appears to be a mountainside.
2025-10-07 03:58:45,066 INFO - ✓ Worker 4: common_voice_en_16087.mp3:  A dark-haired drummer is playing his set with enthusiasm.
2025-10-07 03:58:45,081 INFO - ✓ Worker 0: com

Downloading batch 27:  40%|███▉      | 19/48 [00:00<00:00, 89.82it/s]



Downloading batch 27:  90%|████████▉ | 43/48 [00:00<00:00, 148.48it/s]



Downloading batch 27: 100%|██████████| 48/48 [00:00<00:00, 124.24it/s]

2025-10-07 03:58:56,393 INFO - Transcribing 48 files in parallel...





2025-10-07 03:59:00,497 INFO - ✓ Worker 9: common_voice_en_162540.mp3:  Two young white males are outside near many bushes.
2025-10-07 03:59:00,582 INFO - ✓ Worker 7: common_voice_en_162514.mp3:  A little boy sits on top of a big rock.
2025-10-07 03:59:01,056 INFO - ✓ Worker 12: common_voice_en_162534.mp3:  Two boys race on ice skates while others watch.
2025-10-07 03:59:01,267 INFO - ✓ Worker 1: common_voice_en_162516.mp3:  A man takes a picture of a girl with a patriotic outfit.
2025-10-07 03:59:01,421 INFO - ✓ Worker 0: common_voice_en_162513.mp3:  A man is speaking a presentation while others watch a slide show.
2025-10-07 03:59:01,604 INFO - ✓ Worker 8: common_voice_en_162535.mp3:  A woman is sitting in a vegetable market surrounded by many colorful foods.
2025-10-07 03:59:01,723 INFO - ✓ Worker 11: common_voice_en_162515.mp3:  A woman with curly orange hair stands in a classroom with others observing.
2025-10-07 03:59:01,843 INFO - ✓ Worker 5: common_voice_en_162521.mp3:  Bikers 

Downloading batch 28:  38%|███▊      | 18/48 [00:00<00:00, 84.27it/s]



Downloading batch 28:  79%|███████▉  | 38/48 [00:00<00:00, 130.76it/s]



Downloading batch 28: 100%|██████████| 48/48 [00:00<00:00, 109.92it/s]

2025-10-07 03:59:13,072 INFO - Transcribing 48 files in parallel...





2025-10-07 03:59:16,334 INFO - ✓ Worker 6: common_voice_en_162619.mp3:  Multiple people in this women pool.
2025-10-07 03:59:16,383 INFO - ✓ Worker 15: common_voice_en_162632.mp3:  People line up in the ocean.
2025-10-07 03:59:16,581 INFO - ✓ Worker 14: common_voice_en_162631.mp3:  Two men are holding and playing guitars.
2025-10-07 03:59:16,645 INFO - ✓ Worker 3: common_voice_en_162596.mp3:  A couple ladies having a party.
2025-10-07 03:59:16,840 INFO - ✓ Worker 4: common_voice_en_162612.mp3:  A man reading the paper at a cafe.
2025-10-07 03:59:17,087 INFO - ✓ Worker 11: common_voice_en_162605.mp3:  a priest standing against the wooden wall and thinking.
2025-10-07 03:59:17,106 INFO - ✓ Worker 12: common_voice_en_162628.mp3:  The people are standing in front of a building.
2025-10-07 03:59:17,314 INFO - ✓ Worker 5: common_voice_en_162602.mp3:  Children playing soccer on a field while parents watch.
2025-10-07 03:59:17,620 INFO - ✓ Worker 7: common_voice_en_162624.mp3:  A white dog wit

Downloading batch 29:  35%|███▌      | 17/48 [00:00<00:00, 78.84it/s]



Downloading batch 29:  83%|████████▎ | 40/48 [00:00<00:00, 137.68it/s]



Downloading batch 29: 100%|██████████| 48/48 [00:00<00:00, 124.71it/s]

2025-10-07 03:59:27,945 INFO - Transcribing 48 files in parallel...





2025-10-07 03:59:31,313 INFO - ✓ Worker 14: common_voice_en_162684.mp3:  Two men during a football game.
2025-10-07 03:59:31,598 INFO - ✓ Worker 1: common_voice_en_162673.mp3:  A woman in black climbs the rock wall.
2025-10-07 03:59:31,837 INFO - ✓ Worker 4: common_voice_en_162671.mp3:  Man sprays sand image of a man.
2025-10-07 03:59:31,955 INFO - ✓ Worker 5: common_voice_en_162675.mp3:  A woman is walking her dog across the street.
2025-10-07 03:59:31,962 INFO - ✓ Worker 0: common_voice_en_162677.mp3:  A lady with blonde hair helping another person.
2025-10-07 03:59:32,252 INFO - ✓ Worker 10: common_voice_en_162676.mp3:  A woman lying on a rock at a campground.
2025-10-07 03:59:32,746 INFO - ✓ Worker 11: common_voice_en_162680.mp3:  A man crosses the street next to a light rail station.
2025-10-07 03:59:32,963 INFO - ✓ Worker 8: common_voice_en_162691.mp3:  A woman with brown hair sitting on a bench outside of KFA.
2025-10-07 03:59:33,291 INFO - ✓ Worker 9: common_voice_en_162689.mp3

Downloading batch 30:  48%|████▊     | 23/48 [00:00<00:00, 100.19it/s]



Downloading batch 30:  96%|█████████▌| 46/48 [00:00<00:00, 149.27it/s]



Downloading batch 30: 100%|██████████| 48/48 [00:00<00:00, 119.50it/s]

2025-10-07 03:59:45,066 INFO - Transcribing 48 files in parallel...





2025-10-07 03:59:48,413 INFO - ✓ Worker 11: common_voice_en_162790.mp3:  The people are riding on horses.
2025-10-07 03:59:48,822 INFO - ✓ Worker 0: common_voice_en_162767.mp3:  The blue ball rolls steadily towards the pins.
2025-10-07 03:59:49,084 INFO - ✓ Worker 2: common_voice_en_162780.mp3:  A group of men is drinking at a table.
2025-10-07 03:59:49,324 INFO - ✓ Worker 6: common_voice_en_162762.mp3:  A group of people are conversing with one another.
2025-10-07 03:59:49,503 INFO - ✓ Worker 12: common_voice_en_162783.mp3:  A large group of children are walking in a line.
2025-10-07 03:59:49,766 INFO - ✓ Worker 1: common_voice_en_162757.mp3:  Two ballerinas dancing with beautiful pillars and building and background.
2025-10-07 03:59:49,771 INFO - ✓ Worker 4: common_voice_en_162758.mp3:  Several men in White Chefs Jackets work in a kitchen.
2025-10-07 03:59:49,821 INFO - ✓ Worker 15: common_voice_en_162772.mp3:  A waterplay table in which a game can be played.
2025-10-07 03:59:50,037 

Downloading batch 31:  44%|████▍     | 21/48 [00:00<00:00, 95.56it/s]



Downloading batch 31:  81%|████████▏ | 39/48 [00:00<00:00, 126.18it/s]



Downloading batch 31: 100%|██████████| 48/48 [00:00<00:00, 112.50it/s]

2025-10-07 04:00:00,862 INFO - Transcribing 48 files in parallel...





2025-10-07 04:00:04,223 INFO - ✓ Worker 12: common_voice_en_165940.mp3:  Do people walking outside a castle?
2025-10-07 04:00:04,327 INFO - ✓ Worker 3: common_voice_en_165942.mp3:  Three men are sitting at a table.
2025-10-07 04:00:04,375 INFO - ✓ Worker 5: common_voice_en_166807.mp3:  What's the weather in Macedonia?
2025-10-07 04:00:04,424 INFO - ✓ Worker 2: common_voice_en_167183.mp3:  A group of friends go campaign.
2025-10-07 04:00:04,593 INFO - ✓ Worker 11: common_voice_en_167184.mp3:  Two people on a hockey field fighting.
2025-10-07 04:00:04,655 INFO - ✓ Worker 0: common_voice_en_165617.mp3:  Good wine needs no herald, bush.
2025-10-07 04:00:04,839 INFO - ✓ Worker 13: common_voice_en_167188.mp3:  Protesters are holding signs in Spanish.
2025-10-07 04:00:04,875 INFO - ✓ Worker 1: common_voice_en_165934.mp3:  A man with a shirt works on a roof.
2025-10-07 04:00:04,915 INFO - ✓ Worker 7: common_voice_en_165615.mp3:  Go to Nick Fleetwood on my video.
2025-10-07 04:00:05,037 INFO - 

Downloading batch 32:  46%|████▌     | 22/48 [00:00<00:00, 100.73it/s]



Downloading batch 32:  83%|████████▎ | 40/48 [00:00<00:00, 132.09it/s]



Downloading batch 32: 100%|██████████| 48/48 [00:00<00:00, 111.94it/s]

2025-10-07 04:00:15,693 INFO - Transcribing 48 files in parallel...





2025-10-07 04:00:19,067 INFO - ✓ Worker 14: common_voice_en_170146.mp3:  Read the current textbook a zero.
2025-10-07 04:00:19,226 INFO - ✓ Worker 10: common_voice_en_171014.mp3:  An older woman sitting on a bench.
2025-10-07 04:00:19,414 INFO - ✓ Worker 3: common_voice_en_171020.mp3:  A group of little boys playing indoor soccer.
2025-10-07 04:00:19,536 INFO - ✓ Worker 2: common_voice_en_170517.mp3:  I'm working on a dirt road.
2025-10-07 04:00:19,554 INFO - ✓ Worker 5: common_voice_en_170519.mp3:  A woman balances a small child on her hip.
2025-10-07 04:00:19,896 INFO - ✓ Worker 9: common_voice_en_170516.mp3:  These six people are walking outside a building with columns.
2025-10-07 04:00:19,997 INFO - ✓ Worker 8: common_voice_en_171021.mp3:  A brown dog walks towards another animal hiding in the grass.
2025-10-07 04:00:20,124 INFO - ✓ Worker 4: common_voice_en_171031.mp3:  Two female volleyball players look up to find the ball.
2025-10-07 04:00:20,282 INFO - ✓ Worker 6: common_voice_

Downloading batch 33:  46%|████▌     | 22/48 [00:00<00:00, 92.25it/s]



Downloading batch 33: 100%|██████████| 48/48 [00:00<00:00, 117.17it/s]

2025-10-07 04:00:31,974 INFO - Transcribing 48 files in parallel...





2025-10-07 04:00:35,299 INFO - ✓ Worker 12: common_voice_en_171101.mp3:  A woman is watching the tide.
2025-10-07 04:00:35,729 INFO - ✓ Worker 14: common_voice_en_171096.mp3:  The people look around at the glacier.
2025-10-07 04:00:36,198 INFO - ✓ Worker 4: common_voice_en_171093.mp3:  Trives men with a handmade basket full of grass.
2025-10-07 04:00:36,389 INFO - ✓ Worker 3: common_voice_en_171088.mp3:  The gondola is pulling his boat in the canal
2025-10-07 04:00:36,447 INFO - ✓ Worker 13: common_voice_en_171084.mp3:  A man is balancing on a rope in public with silver boots.
2025-10-07 04:00:36,448 INFO - ✓ Worker 6: common_voice_en_171071.mp3:  A woman is walking in winter with a card of groceries.
2025-10-07 04:00:36,572 INFO - ✓ Worker 0: common_voice_en_171074.mp3:  A man wearing a biohazard gas mask.
2025-10-07 04:00:36,643 INFO - ✓ Worker 9: common_voice_en_171073.mp3:  Two young women pose next to a fence in a street.
2025-10-07 04:00:36,951 INFO - ✓ Worker 8: common_voice_en_

Downloading batch 34:  40%|███▉      | 19/48 [00:00<00:00, 85.39it/s]



Downloading batch 34:  88%|████████▊ | 42/48 [00:00<00:00, 135.85it/s]



Downloading batch 34: 100%|██████████| 48/48 [00:00<00:00, 120.94it/s]

2025-10-07 04:00:48,394 INFO - Transcribing 48 files in parallel...





2025-10-07 04:00:51,261 INFO - ✓ Worker 4: common_voice_en_171144.mp3:  Girls in school uniforms.
2025-10-07 04:00:51,586 INFO - ✓ Worker 2: common_voice_en_171145.mp3:  Beautiful place at night time.
2025-10-07 04:00:52,086 INFO - ✓ Worker 6: common_voice_en_171166.mp3:  These friends are posing near the store window.
2025-10-07 04:00:52,366 INFO - ✓ Worker 11: common_voice_en_171167.mp3:  A man surrounded by others in a subway train.
2025-10-07 04:00:52,387 INFO - ✓ Worker 14: common_voice_en_171158.mp3:  A man enjoys a treat on a crowded street.
2025-10-07 04:00:52,516 INFO - ✓ Worker 12: common_voice_en_171148.mp3:  Three young males sitting on a basketball court at night.
2025-10-07 04:00:52,557 INFO - ✓ Worker 13: common_voice_en_171153.mp3:  A female tennis player is casually swinging her racket.
2025-10-07 04:00:52,575 INFO - ✓ Worker 5: common_voice_en_171156.mp3:  A man and a woman are kissing in a city.
2025-10-07 04:00:52,787 INFO - ✓ Worker 8: common_voice_en_171164.mp3:  

Downloading batch 35:  42%|████▏     | 20/48 [00:00<00:00, 84.39it/s]



Downloading batch 35:  98%|█████████▊| 47/48 [00:00<00:00, 154.54it/s]



Downloading batch 35: 100%|██████████| 48/48 [00:00<00:00, 121.90it/s]

2025-10-07 04:01:04,082 INFO - Transcribing 48 files in parallel...





2025-10-07 04:01:07,462 INFO - ✓ Worker 5: common_voice_en_171204.mp3:  A boy in blue carries a bucket.
2025-10-07 04:01:07,691 INFO - ✓ Worker 9: common_voice_en_171226.mp3:  The brown dog is swimming in the water.
2025-10-07 04:01:07,872 INFO - ✓ Worker 10: common_voice_en_171218.mp3:  A woman works in a flower garden.
2025-10-07 04:01:07,921 INFO - ✓ Worker 13: common_voice_en_171225.mp3:  Three kids play, one is blindfolded.
2025-10-07 04:01:07,940 INFO - ✓ Worker 12: common_voice_en_171222.mp3:  A man and a woman are laughing together.
2025-10-07 04:01:08,042 INFO - ✓ Worker 15: common_voice_en_171203.mp3:  Two girls are racing another girl in the air.
2025-10-07 04:01:08,091 INFO - ✓ Worker 6: common_voice_en_171208.mp3:  A little girl riding a toy horse during Christmas.
2025-10-07 04:01:08,200 INFO - ✓ Worker 11: common_voice_en_171213.mp3:  A young child is posing with goofy.
2025-10-07 04:01:08,559 INFO - ✓ Worker 14: common_voice_en_171214.mp3:  A group of people are sitting

Downloading batch 36:  40%|███▉      | 19/48 [00:00<00:00, 86.11it/s]



Downloading batch 36:  75%|███████▌  | 36/48 [00:00<00:00, 118.97it/s]



Downloading batch 36: 100%|██████████| 48/48 [00:00<00:00, 108.80it/s]

2025-10-07 04:01:19,895 INFO - Transcribing 48 files in parallel...





2025-10-07 04:01:22,932 INFO - ✓ Worker 3: common_voice_en_171291.mp3:  A woman scratching her face.
2025-10-07 04:01:22,979 INFO - ✓ Worker 5: common_voice_en_171272.mp3:  Two people in winter clothing.
2025-10-07 04:01:23,617 INFO - ✓ Worker 7: common_voice_en_171268.mp3:  Lady in short dress walking on sidewalk.
2025-10-07 04:01:23,784 INFO - ✓ Worker 2: common_voice_en_171284.mp3:  People walking along the beach on a sunny day.
2025-10-07 04:01:24,213 INFO - ✓ Worker 13: common_voice_en_171298.mp3:  Some children are sitting on rafts and I like
2025-10-07 04:01:24,350 INFO - ✓ Worker 15: common_voice_en_171288.mp3:  Three men sit down and a building illuminated with green light.
2025-10-07 04:01:24,369 INFO - ✓ Worker 14: common_voice_en_171279.mp3:  A man in a blue t-shirt holds a camera.
2025-10-07 04:01:24,446 INFO - ✓ Worker 8: common_voice_en_171269.mp3:  A group of Asian people stand and sit in ceremonial dress.
2025-10-07 04:01:24,453 INFO - ✓ Worker 12: common_voice_en_1712

Downloading batch 37:   2%|▏         | 1/48 [00:00<00:07,  6.69it/s]



Downloading batch 37:  23%|██▎       | 11/48 [00:00<00:00, 51.23it/s]



Downloading batch 37:  62%|██████▎   | 30/48 [00:00<00:00, 106.39it/s]



Downloading batch 37: 100%|██████████| 48/48 [00:00<00:00, 82.44it/s] 

2025-10-07 04:01:35,535 INFO - Transcribing 48 files in parallel...





2025-10-07 04:01:38,693 INFO - ✓ Worker 6: common_voice_en_17282320.mp3:  You guys should try starting small.
2025-10-07 04:01:38,694 INFO - ✓ Worker 13: common_voice_en_17281459.mp3:  Let me worry about that.
2025-10-07 04:01:39,057 INFO - ✓ Worker 1: common_voice_en_17281504.mp3:  Roads are paved with sticky tar.
2025-10-07 04:01:39,065 INFO - ✓ Worker 11: common_voice_en_17277788.mp3:  We have an excess of squirrels.
2025-10-07 04:01:39,075 INFO - ✓ Worker 14: common_voice_en_17280382.mp3:  That was someone else's handy work.
2025-10-07 04:01:39,348 INFO - ✓ Worker 3: common_voice_en_17281834.mp3:  You weren't kidding about the bait part.
2025-10-07 04:01:39,576 INFO - ✓ Worker 10: common_voice_en_17284001.mp3:  Grasp no more than die hand will hold.
2025-10-07 04:01:39,660 INFO - ✓ Worker 2: common_voice_en_17275788.mp3:  I'm just using the standard repair style.
2025-10-07 04:01:39,818 INFO - ✓ Worker 8: common_voice_en_17276609.mp3:  Voyage your one, it's left our solar system.
2

Downloading batch 38:   2%|▏         | 1/48 [00:00<00:07,  6.67it/s]



Downloading batch 38:  38%|███▊      | 18/48 [00:00<00:00, 82.52it/s]



Downloading batch 38:  90%|████████▉ | 43/48 [00:00<00:00, 147.35it/s]



Downloading batch 38: 100%|██████████| 48/48 [00:00<00:00, 116.14it/s]

2025-10-07 04:01:50,576 INFO - Transcribing 48 files in parallel...





2025-10-07 04:01:53,574 INFO - ✓ Worker 7: common_voice_en_17284734.mp3:  Let's talk about childhood.
2025-10-07 04:01:53,594 INFO - ✓ Worker 10: common_voice_en_17284733.mp3:  The refrigerator is almost empty.
2025-10-07 04:01:53,611 INFO - ✓ Worker 3: common_voice_en_17284732.mp3:  Martha has been injured.
2025-10-07 04:01:53,754 INFO - ✓ Worker 9: common_voice_en_17284717.mp3:  That would not have been easy.
2025-10-07 04:01:54,146 INFO - ✓ Worker 11: common_voice_en_17284730.mp3:  He is a real Protein Junkie.
2025-10-07 04:01:54,434 INFO - ✓ Worker 6: common_voice_en_17284719.mp3:  The laptop from Erichert hits the problem.
2025-10-07 04:01:54,528 INFO - ✓ Worker 14: common_voice_en_17284725.mp3:  I don't have to get used to it.
2025-10-07 04:01:54,771 INFO - ✓ Worker 15: common_voice_en_17284736.mp3:  Nadine is sometimes too good for this world.
2025-10-07 04:01:54,933 INFO - ✓ Worker 13: common_voice_en_17284737.mp3:  Have a nice day, the car looks good.
2025-10-07 04:01:55,318 I

Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


2025-10-07 04:02:09,335 INFO - ✓ Worker 10: common_voice_en_17284747.mp3:  Don't forget to like and subscribe to the channel and don't forget to subscribe to the channel and ...
2025-10-07 04:02:09,562 INFO - ✓ Worker 10: common_voice_en_17284760.mp3:  The ice is so cold that it's damped.


Downloading batch 39:  83%|████████▎ | 40/48 [00:00<00:00, 137.96it/s]



Downloading batch 39: 100%|██████████| 48/48 [00:00<00:00, 112.10it/s]

2025-10-07 04:02:10,560 INFO - Transcribing 48 files in parallel...





2025-10-07 04:02:13,272 INFO - ✓ Worker 7: common_voice_en_17284772.mp3:  a lot of things.
2025-10-07 04:02:13,398 INFO - ✓ Worker 1: common_voice_en_17284785.mp3:  Best of the it.
2025-10-07 04:02:13,744 INFO - ✓ Worker 2: common_voice_en_17284777.mp3:  I work for sector 7.
2025-10-07 04:02:13,788 INFO - ✓ Worker 12: common_voice_en_17284792.mp3:  We did a good job.
2025-10-07 04:02:13,830 INFO - ✓ Worker 11: common_voice_en_17284769.mp3:  You should not disappear here.
2025-10-07 04:02:14,024 INFO - ✓ Worker 14: common_voice_en_17284784.mp3:  It's so moistened milk.
2025-10-07 04:02:14,145 INFO - ✓ Worker 0: common_voice_en_17284770.mp3:  May I ask for your attention please?
2025-10-07 04:02:14,225 INFO - ✓ Worker 5: common_voice_en_17284786.mp3:  Maybe he had his information on Instagram.
2025-10-07 04:02:14,355 INFO - ✓ Worker 15: common_voice_en_17284776.mp3:  Alex has started a lot of research.
2025-10-07 04:02:14,751 INFO - ✓ Worker 3: common_voice_en_17284766.mp3:  We are not h

Downloading batch 40:   2%|▏         | 1/48 [00:00<00:07,  6.23it/s]



Downloading batch 40:  38%|███▊      | 18/48 [00:00<00:00, 78.16it/s]



Downloading batch 40:  88%|████████▊ | 42/48 [00:00<00:00, 139.28it/s]



Downloading batch 40: 100%|██████████| 48/48 [00:00<00:00, 116.13it/s]

2025-10-07 04:02:25,560 INFO - Transcribing 48 files in parallel...





2025-10-07 04:02:28,884 INFO - ✓ Worker 11: common_voice_en_17284839.mp3:  Can I answer that as well?
2025-10-07 04:02:28,928 INFO - ✓ Worker 1: common_voice_en_17284832.mp3:  Thank you very much for your attention.
2025-10-07 04:02:29,223 INFO - ✓ Worker 15: common_voice_en_17284844.mp3:  The link is in the description.
2025-10-07 04:02:29,485 INFO - ✓ Worker 8: common_voice_en_17284823.mp3:  Vessen, Biergruppe, ist das.
2025-10-07 04:02:29,832 INFO - ✓ Worker 2: common_voice_en_17284829.mp3:  Pierre wants to build a new graphic card.
2025-10-07 04:02:29,866 INFO - ✓ Worker 5: common_voice_en_17284845.mp3:  In the opposite direction, the rifle must be controlled.
2025-10-07 04:02:30,042 INFO - ✓ Worker 9: common_voice_en_17284840.mp3:  I've seen something like this for a long time.
2025-10-07 04:02:30,070 INFO - ✓ Worker 13: common_voice_en_17284824.mp3:  All the democratic restrictions are just ahead of the ground.
2025-10-07 04:02:30,138 INFO - ✓ Worker 10: common_voice_en_17284846.

Downloading batch 41:  38%|███▊      | 18/48 [00:00<00:00, 83.61it/s]



Downloading batch 41:  92%|█████████▏| 44/48 [00:00<00:00, 153.27it/s]



Downloading batch 41: 100%|██████████| 48/48 [00:00<00:00, 86.13it/s] 

2025-10-07 04:02:41,062 INFO - Transcribing 48 files in parallel...





2025-10-07 04:02:44,548 INFO - ✓ Worker 1: common_voice_en_17284893.mp3:  Can I offer you something to drink?
2025-10-07 04:02:44,753 INFO - ✓ Worker 3: common_voice_en_17284901.mp3:  Trust is good, control is better.
2025-10-07 04:02:44,771 INFO - ✓ Worker 5: common_voice_en_17284900.mp3:  We have a problem with the hydraulic.
2025-10-07 04:02:44,801 INFO - ✓ Worker 9: common_voice_en_17284896.mp3:  And now it's going to be complicated.
2025-10-07 04:02:44,921 INFO - ✓ Worker 4: common_voice_en_17284889.mp3:  Why do you buy still water in bottles?
2025-10-07 04:02:45,027 INFO - ✓ Worker 12: common_voice_en_17284926.mp3:  Here they are, if I'm not mistaken.
2025-10-07 04:02:45,298 INFO - ✓ Worker 11: common_voice_en_17284924.mp3:  Who is responsible for the coordination of the government?
2025-10-07 04:02:45,585 INFO - ✓ Worker 13: common_voice_en_17284907.mp3:  Orang strikt nur die Müllabfuhr.
2025-10-07 04:02:45,879 INFO - ✓ Worker 15: common_voice_en_17284910.mp3:  Erna, that's pret

Downloading batch 42:   2%|▏         | 1/48 [00:00<00:07,  6.46it/s]



Downloading batch 42:  42%|████▏     | 20/48 [00:00<00:00, 93.67it/s]



Downloading batch 42:  88%|████████▊ | 42/48 [00:00<00:00, 143.19it/s]



Downloading batch 42: 100%|██████████| 48/48 [00:00<00:00, 125.86it/s]

2025-10-07 04:02:55,998 INFO - Transcribing 48 files in parallel...





2025-10-07 04:02:59,124 INFO - ✓ Worker 0: common_voice_en_17284970.mp3:  Who has this drawing done?
2025-10-07 04:02:59,801 INFO - ✓ Worker 12: common_voice_en_17285127.mp3:  More tangos on 6 a.m.
2025-10-07 04:02:59,811 INFO - ✓ Worker 2: common_voice_en_17284969.mp3:  There is no 146 stockwork.
2025-10-07 04:03:00,052 INFO - ✓ Worker 6: common_voice_en_17284979.mp3:  This is grammaticalish, not correct?
2025-10-07 04:03:00,189 INFO - ✓ Worker 4: common_voice_en_17285027.mp3:  On the next day, the new staff will begin.
2025-10-07 04:03:00,481 INFO - ✓ Worker 3: common_voice_en_17284976.mp3:  What can you do against history?
2025-10-07 04:03:00,668 INFO - ✓ Worker 11: common_voice_en_17285031.mp3:  The opposite hypothesis can't be re-earned again.
2025-10-07 04:03:00,792 INFO - ✓ Worker 9: common_voice_en_17284968.mp3:  The new information building was named after Conrad Zuse.
2025-10-07 04:03:00,903 INFO - ✓ Worker 15: common_voice_en_17285032.mp3:  For audio speakers, only a loss of

Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


2025-10-07 04:03:15,640 INFO - ✓ Worker 2: common_voice_en_17285020.mp3:  The boss holds his deck-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-tick-...
2025-10-07 04:03:15,832 INFO - ✓ Worker 2: common_voice_en_17285162.mp3:  On the other hand, they make a romantic evening.


Downloading batch 43:  40%|███▉      | 19/48 [00:00<00:00, 80.15it/s]



Downloading batch 43:  83%|████████▎ | 40/48 [00:00<00:00, 128.80it/s]



Downloading batch 43: 100%|██████████| 48/48 [00:00<00:00, 113.13it/s]

2025-10-07 04:03:16,806 INFO - Transcribing 48 files in parallel...





2025-10-07 04:03:19,711 INFO - ✓ Worker 1: common_voice_en_17285171.mp3:  How can you protect yourself?
2025-10-07 04:03:20,002 INFO - ✓ Worker 0: common_voice_en_173150.mp3:  There was no justification for it.
2025-10-07 04:03:20,130 INFO - ✓ Worker 9: common_voice_en_17285176.mp3:  Can you please do this session?
2025-10-07 04:03:20,542 INFO - ✓ Worker 2: common_voice_en_17288401.mp3:  Find our useful information is on the website.
2025-10-07 04:03:20,886 INFO - ✓ Worker 12: common_voice_en_17296828.mp3:  Never took a flight from South Africa to Japan.
2025-10-07 04:03:20,925 INFO - ✓ Worker 8: common_voice_en_173173.mp3:  Children in canoes floating down their river.
2025-10-07 04:03:21,079 INFO - ✓ Worker 15: common_voice_en_173175.mp3:  A bicyclist becomes airborne among dirt hills at night.
2025-10-07 04:03:21,151 INFO - ✓ Worker 6: common_voice_en_173170.mp3:  Person jumping bicycle off cliff beside building on hill.
2025-10-07 04:03:21,166 INFO - ✓ Worker 10: common_voice_en_17

Downloading batch 44:   2%|▏         | 1/48 [00:00<00:06,  6.84it/s]



Downloading batch 44:  33%|███▎      | 16/48 [00:00<00:00, 77.39it/s]



Downloading batch 44:  77%|███████▋  | 37/48 [00:00<00:00, 129.12it/s]



Downloading batch 44: 100%|██████████| 48/48 [00:00<00:00, 115.74it/s]

2025-10-07 04:03:32,024 INFO - Transcribing 48 files in parallel...





2025-10-07 04:03:34,082 INFO - ✓ Worker 1: common_voice_en_17366229.mp3:  Not very quick.
2025-10-07 04:03:35,609 INFO - ✓ Worker 10: common_voice_en_174599.mp3:  A boy asleep in his dinner bowl.
2025-10-07 04:03:35,682 INFO - ✓ Worker 5: common_voice_en_17380945.mp3:  Verify if the configuration file is valid.
2025-10-07 04:03:35,761 INFO - ✓ Worker 9: common_voice_en_173244.mp3:  Can you get me the Border Patrol song?
2025-10-07 04:03:35,907 INFO - ✓ Worker 6: common_voice_en_174519.mp3:  Several people are hugging each other inside a bar
2025-10-07 04:03:35,956 INFO - ✓ Worker 11: common_voice_en_173212.mp3:  two workers in a boat rake upon it.
2025-10-07 04:03:36,040 INFO - ✓ Worker 15: common_voice_en_174468.mp3:  Boy, I'm playing Ground Equipment.
2025-10-07 04:03:36,259 INFO - ✓ Worker 14: common_voice_en_174542.mp3:  Whether at Navajo National Monument on Friday.
2025-10-07 04:03:36,763 INFO - ✓ Worker 12: common_voice_en_174460.mp3:  Man in a blue sweater and black pants exits

Downloading batch 45:  40%|███▉      | 19/48 [00:00<00:00, 85.94it/s]



Downloading batch 45:  88%|████████▊ | 42/48 [00:00<00:00, 142.27it/s]



Downloading batch 45: 100%|██████████| 48/48 [00:00<00:00, 117.18it/s]

2025-10-07 04:03:47,218 INFO - Transcribing 48 files in parallel...





2025-10-07 04:03:50,929 INFO - ✓ Worker 6: common_voice_en_177009.mp3:  Young man in a classroom creating art.
2025-10-07 04:03:51,004 INFO - ✓ Worker 2: common_voice_en_177020.mp3:  Four young children perform a martial art.
2025-10-07 04:03:51,120 INFO - ✓ Worker 11: common_voice_en_177014.mp3:  The surfer is wiped out by the wave.
2025-10-07 04:03:51,703 INFO - ✓ Worker 3: common_voice_en_177017.mp3:  Add this tune to the Sylvia Plath playlist.
2025-10-07 04:03:51,875 INFO - ✓ Worker 8: common_voice_en_177010.mp3:  A man in a white t-shirt cleaning up debris.
2025-10-07 04:03:51,950 INFO - ✓ Worker 1: common_voice_en_17669826.mp3:  The least you can do is call 911.
2025-10-07 04:03:52,049 INFO - ✓ Worker 14: common_voice_en_177018.mp3:  A large group of people sitting in a room and watching something.
2025-10-07 04:03:52,100 INFO - ✓ Worker 13: common_voice_en_17718578.mp3:  There was no replacement for the armed forces lost in the fight.
2025-10-07 04:03:52,285 INFO - ✓ Worker 4: c

Downloading batch 46:  40%|███▉      | 19/48 [00:00<00:00, 87.47it/s]



Downloading batch 46:  88%|████████▊ | 42/48 [00:00<00:00, 142.55it/s]



Downloading batch 46: 100%|██████████| 48/48 [00:00<00:00, 123.54it/s]

2025-10-07 04:04:02,876 INFO - Transcribing 48 files in parallel...





2025-10-07 04:04:05,662 INFO - ✓ Worker 9: common_voice_en_17787629.mp3:  It has gone now.
2025-10-07 04:04:06,065 INFO - ✓ Worker 1: common_voice_en_17780695.mp3:  Someone accidentally aborted the mission.
2025-10-07 04:04:06,303 INFO - ✓ Worker 6: common_voice_en_177909.mp3:  A couple sit outside of an establishment.
2025-10-07 04:04:06,430 INFO - ✓ Worker 14: common_voice_en_17791284.mp3:  Rules are made to be broken.
2025-10-07 04:04:06,771 INFO - ✓ Worker 13: common_voice_en_17781153.mp3:  Keep the hatch tight and the watch constant.
2025-10-07 04:04:06,853 INFO - ✓ Worker 12: common_voice_en_177895.mp3:  Several people are walking out in the spotlight.
2025-10-07 04:04:06,901 INFO - ✓ Worker 7: common_voice_en_17787551.mp3:  Silence is an excellent remedy against lander.
2025-10-07 04:04:06,927 INFO - ✓ Worker 0: common_voice_en_17780605.mp3:  I propose a dictionary attack instead of brute force.
2025-10-07 04:04:07,103 INFO - ✓ Worker 8: common_voice_en_17780615.mp3:  There are 

Downloading batch 47:  42%|████▏     | 20/48 [00:00<00:00, 86.76it/s]



Downloading batch 47:  94%|█████████▍| 45/48 [00:00<00:00, 137.84it/s]



Downloading batch 47: 100%|██████████| 48/48 [00:00<00:00, 111.58it/s]

2025-10-07 04:04:17,694 INFO - Transcribing 48 files in parallel...





2025-10-07 04:04:20,228 INFO - ✓ Worker 3: common_voice_en_17819194.mp3:  It shall be done.
2025-10-07 04:04:20,235 INFO - ✓ Worker 9: common_voice_en_17801706.mp3:  and hotel.
2025-10-07 04:04:20,315 INFO - ✓ Worker 12: common_voice_en_17810827.mp3:  You look like one.
2025-10-07 04:04:20,484 INFO - ✓ Worker 2: common_voice_en_17804741.mp3:  Of course, video!
2025-10-07 04:04:20,559 INFO - ✓ Worker 0: common_voice_en_17799712.mp3:  You are quite right.
2025-10-07 04:04:20,607 INFO - ✓ Worker 13: common_voice_en_17827705.mp3:  I mean what I said.
2025-10-07 04:04:21,128 INFO - ✓ Worker 8: common_voice_en_17834192.mp3:  One could be on this house another.
2025-10-07 04:04:21,237 INFO - ✓ Worker 11: common_voice_en_17810378.mp3:  The strong arm shall shield your honor.
2025-10-07 04:04:21,336 INFO - ✓ Worker 10: common_voice_en_17801699.mp3:  Index arithmetic is performed to create this.
2025-10-07 04:04:21,364 INFO - ✓ Worker 6: common_voice_en_17818466.mp3:  This is used for host-based

Downloading batch 48:  44%|████▍     | 21/48 [00:00<00:00, 94.54it/s]



Downloading batch 48:  88%|████████▊ | 42/48 [00:00<00:00, 137.95it/s]



Downloading batch 48: 100%|██████████| 48/48 [00:00<00:00, 115.68it/s]

2025-10-07 04:04:31,867 INFO - Transcribing 48 files in parallel...





2025-10-07 04:04:34,273 INFO - ✓ Worker 15: common_voice_en_178618.mp3:  Guy kisses girl.
2025-10-07 04:04:34,994 INFO - ✓ Worker 7: common_voice_en_178603.mp3:  A child running through the water.
2025-10-07 04:04:35,097 INFO - ✓ Worker 3: common_voice_en_178601.mp3:  Man skates along cement wall.
2025-10-07 04:04:35,733 INFO - ✓ Worker 5: common_voice_en_17861487.mp3:  Not to know how to sign one's name.
2025-10-07 04:04:35,840 INFO - ✓ Worker 6: common_voice_en_178622.mp3:  People relaxing in a park, one of them sleeping.
2025-10-07 04:04:36,250 INFO - ✓ Worker 9: common_voice_en_178623.mp3:  An old man in a straw hat sits on a park bench.
2025-10-07 04:04:36,390 INFO - ✓ Worker 2: common_voice_en_178613.mp3:  An older woman in sunglasses sits behind a man in a yellow shirt.
2025-10-07 04:04:36,404 INFO - ✓ Worker 0: common_voice_en_178615.mp3:  Two kids are in a stroller in front of a fire engine.
2025-10-07 04:04:36,752 INFO - ✓ Worker 13: common_voice_en_178624.mp3:  A young boy p

Downloading batch 49:   2%|▏         | 1/48 [00:00<00:09,  5.18it/s]



Downloading batch 49:  46%|████▌     | 22/48 [00:00<00:00, 91.76it/s]



Downloading batch 49:  94%|█████████▍| 45/48 [00:00<00:00, 143.53it/s]



Downloading batch 49: 100%|██████████| 48/48 [00:00<00:00, 115.79it/s]

2025-10-07 04:04:46,914 INFO - Transcribing 48 files in parallel...





2025-10-07 04:04:49,948 INFO - ✓ Worker 4: common_voice_en_17939330.mp3:  Two saints on one day.
2025-10-07 04:04:50,153 INFO - ✓ Worker 0: common_voice_en_1793581.mp3:  The defadils are blooming.
2025-10-07 04:04:50,179 INFO - ✓ Worker 14: common_voice_en_18005695.mp3:  Passing the bridge was free.
2025-10-07 04:04:50,264 INFO - ✓ Worker 1: common_voice_en_179337.mp3:  One kid jumping on the jump.
2025-10-07 04:04:50,531 INFO - ✓ Worker 5: common_voice_en_179338.mp3:  Several people lying down on the beach.
2025-10-07 04:04:50,817 INFO - ✓ Worker 15: common_voice_en_17941425.mp3:  That move means the game is over.
2025-10-07 04:04:51,050 INFO - ✓ Worker 2: common_voice_en_18005693.mp3:  She said you told her where it was.
2025-10-07 04:04:51,274 INFO - ✓ Worker 6: common_voice_en_18058858.mp3:  The cows cover and hauled trees were blue.
2025-10-07 04:04:51,462 INFO - ✓ Worker 11: common_voice_en_17982742.mp3:  Press the pants and sew a button on the vest.
2025-10-07 04:04:51,606 INFO 

Downloading batch 50:  46%|████▌     | 22/48 [00:00<00:00, 95.44it/s]



Downloading batch 50:  90%|████████▉ | 43/48 [00:00<00:00, 139.09it/s]



Downloading batch 50: 100%|██████████| 48/48 [00:00<00:00, 115.28it/s]

2025-10-07 04:05:01,932 INFO - Transcribing 48 files in parallel...





2025-10-07 04:05:04,477 INFO - ✓ Worker 1: common_voice_en_181642.mp3:  Briefed is last.
2025-10-07 04:05:04,730 INFO - ✓ Worker 7: common_voice_en_18250355.mp3:  Show me relevant results.
2025-10-07 04:05:05,008 INFO - ✓ Worker 13: common_voice_en_18250353.mp3:  She had a bad cuff.
2025-10-07 04:05:05,029 INFO - ✓ Worker 9: common_voice_en_18253927.mp3:  The dog ate my homework.
2025-10-07 04:05:05,136 INFO - ✓ Worker 2: common_voice_en_18173688.mp3:  It came about in this way.
2025-10-07 04:05:05,627 INFO - ✓ Worker 0: common_voice_en_181614.mp3:  Need the sawger called the black lamp.
2025-10-07 04:05:05,658 INFO - ✓ Worker 10: common_voice_en_18183180.mp3:  Pray to come us before I fall.
2025-10-07 04:05:05,677 INFO - ✓ Worker 3: common_voice_en_18251171.mp3:  Campyllery blood vessel.
2025-10-07 04:05:05,725 INFO - ✓ Worker 14: common_voice_en_18250345.mp3:  You have many herbs in this garden.
2025-10-07 04:05:05,866 INFO - ✓ Worker 8: common_voice_en_18250350.mp3:  Some plastics c

Downloading batch 51:  35%|███▌      | 17/48 [00:00<00:00, 75.32it/s]



Downloading batch 51:  83%|████████▎ | 40/48 [00:00<00:00, 132.60it/s]



Downloading batch 51: 100%|██████████| 48/48 [00:00<00:00, 98.34it/s] 

2025-10-07 04:05:14,741 INFO - Transcribing 48 files in parallel...





2025-10-07 04:05:17,466 INFO - ✓ Worker 5: common_voice_en_18265925.mp3:  Use button moderately.
2025-10-07 04:05:17,811 INFO - ✓ Worker 3: common_voice_en_18266698.mp3:  There are no tricks here.
2025-10-07 04:05:18,002 INFO - ✓ Worker 6: common_voice_en_18265672.mp3:  What would you have us called?
2025-10-07 04:05:18,033 INFO - ✓ Worker 13: common_voice_en_18266523.mp3:  The car drove smooth and quiet.
2025-10-07 04:05:18,152 INFO - ✓ Worker 12: common_voice_en_18268157.mp3:  Isn't that name a bit authoritarian?
2025-10-07 04:05:18,420 INFO - ✓ Worker 10: common_voice_en_18266737.mp3:  I would like to carve a wooden dolphin.
2025-10-07 04:05:18,425 INFO - ✓ Worker 4: common_voice_en_18262611.mp3:  The table was displayed in a structured hierarchy.
2025-10-07 04:05:18,768 INFO - ✓ Worker 8: common_voice_en_18265920.mp3:  Americans say gasoline and the British say petrol.
2025-10-07 04:05:18,871 INFO - ✓ Worker 1: common_voice_en_18265620.mp3:  The Coast Guard rescued passengers of a 

Downloading batch 52:   2%|▏         | 1/48 [00:00<00:08,  5.68it/s]



Downloading batch 52:  46%|████▌     | 22/48 [00:00<00:00, 93.65it/s]



Downloading batch 52:  92%|█████████▏| 44/48 [00:00<00:00, 136.53it/s]



Downloading batch 52: 100%|██████████| 48/48 [00:00<00:00, 111.35it/s]

2025-10-07 04:05:27,301 INFO - Transcribing 48 files in parallel...





2025-10-07 04:05:30,091 INFO - ✓ Worker 9: common_voice_en_18281510.mp3:  Here comes the cavalry.
2025-10-07 04:05:30,158 INFO - ✓ Worker 11: common_voice_en_18280349.mp3:  This is domestic produce.
2025-10-07 04:05:30,333 INFO - ✓ Worker 15: common_voice_en_18283226.mp3:  Which version do you have?
2025-10-07 04:05:30,412 INFO - ✓ Worker 3: common_voice_en_18277669.mp3:  I need to mold the grass.
2025-10-07 04:05:30,607 INFO - ✓ Worker 0: common_voice_en_18277624.mp3:  Tuna tastes great with bread.
2025-10-07 04:05:30,924 INFO - ✓ Worker 7: common_voice_en_18282800.mp3:  Even the woman knew how to be silent.
2025-10-07 04:05:31,135 INFO - ✓ Worker 8: common_voice_en_18280350.mp3:  They felt insulted that they didn't recognize him.
2025-10-07 04:05:31,140 INFO - ✓ Worker 2: common_voice_en_18277723.mp3:  There is a lot of literature on this subject.
2025-10-07 04:05:31,336 INFO - ✓ Worker 6: common_voice_en_18277665.mp3:  The dramatic drill rattled uncontrollably.
2025-10-07 04:05:31,3

Downloading batch 53:   2%|▏         | 1/48 [00:00<00:07,  6.15it/s]



Downloading batch 53:  46%|████▌     | 22/48 [00:00<00:00, 99.69it/s]



Downloading batch 53:  79%|███████▉  | 38/48 [00:00<00:00, 122.48it/s]



Downloading batch 53: 100%|██████████| 48/48 [00:00<00:00, 118.47it/s]

2025-10-07 04:05:40,409 INFO - Transcribing 48 files in parallel...





2025-10-07 04:05:43,615 INFO - ✓ Worker 5: common_voice_en_18313322.mp3:  I'll go over tomorrow afternoon.
2025-10-07 04:05:43,681 INFO - ✓ Worker 13: common_voice_en_18313378.mp3:  The house was built using concrete.
2025-10-07 04:05:43,700 INFO - ✓ Worker 3: common_voice_en_18302342.mp3:  Who is the oldest person alive?
2025-10-07 04:05:43,760 INFO - ✓ Worker 9: common_voice_en_18313329.mp3:  Banana is full of potassium.
2025-10-07 04:05:43,785 INFO - ✓ Worker 0: common_voice_en_18313316.mp3:  I have been doubly baptized.
2025-10-07 04:05:43,836 INFO - ✓ Worker 1: common_voice_en_18299316.mp3:  The device has a gyro motor.
2025-10-07 04:05:43,862 INFO - ✓ Worker 15: common_voice_en_18302167.mp3:  I'll have a tackle please.
2025-10-07 04:05:43,959 INFO - ✓ Worker 2: common_voice_en_18299261.mp3:  The can was made out of aluminium.
2025-10-07 04:05:44,188 INFO - ✓ Worker 8: common_voice_en_18299154.mp3:  I pray you will get well soon.
2025-10-07 04:05:44,212 INFO - ✓ Worker 14: common_

Downloading batch 54:  31%|███▏      | 15/48 [00:00<00:00, 70.98it/s]



Downloading batch 54:  79%|███████▉  | 38/48 [00:00<00:00, 128.47it/s]



Downloading batch 54: 100%|██████████| 48/48 [00:00<00:00, 111.02it/s]

2025-10-07 04:05:54,926 INFO - Transcribing 48 files in parallel...





2025-10-07 04:05:58,354 INFO - ✓ Worker 8: common_voice_en_18318155.mp3:  Haven't you read the high record?
2025-10-07 04:05:58,796 INFO - ✓ Worker 14: common_voice_en_18319568.mp3:  He confessed that the sketch had started with him.
2025-10-07 04:05:58,837 INFO - ✓ Worker 7: common_voice_en_18317047.mp3:  It is merely the simplest superlative.
2025-10-07 04:05:58,907 INFO - ✓ Worker 6: common_voice_en_18317865.mp3:  It was like the beating of hoofs.
2025-10-07 04:05:59,034 INFO - ✓ Worker 9: common_voice_en_18317053.mp3:  It was Jan singing softly over Beyond the Rocks.
2025-10-07 04:05:59,097 INFO - ✓ Worker 3: common_voice_en_18317056.mp3:  Here the Indian dogs wailing down a Churchill.
2025-10-07 04:05:59,442 INFO - ✓ Worker 13: common_voice_en_18318182.mp3:  I am writing these lines in Honolulu, Hawaii.
2025-10-07 04:05:59,447 INFO - ✓ Worker 10: common_voice_en_18319602.mp3:  I came for information more out of curiosity than anything else.
2025-10-07 04:05:59,449 INFO - ✓ Worker 

Downloading batch 55:   2%|▏         | 1/48 [00:00<00:08,  5.79it/s]



Downloading batch 55:  44%|████▍     | 21/48 [00:00<00:00, 93.13it/s]



Downloading batch 55:  88%|████████▊ | 42/48 [00:00<00:00, 132.71it/s]



Downloading batch 55: 100%|██████████| 48/48 [00:00<00:00, 84.30it/s] 

2025-10-07 04:06:10,666 INFO - Transcribing 48 files in parallel...





2025-10-07 04:06:14,387 INFO - ✓ Worker 13: common_voice_en_18322791.mp3:  Laughter echoed through the circus.
2025-10-07 04:06:14,533 INFO - ✓ Worker 9: common_voice_en_18319981.mp3:  My friend lives at 61 Main Street.
2025-10-07 04:06:14,633 INFO - ✓ Worker 11: common_voice_en_18319991.mp3:  The boy Orion was specially maltreated.
2025-10-07 04:06:14,659 INFO - ✓ Worker 8: common_voice_en_18321185.mp3:  Segs on, on to her, yo.
2025-10-07 04:06:14,846 INFO - ✓ Worker 10: common_voice_en_18319972.mp3:  The Italian Rancho was a bachelor establishment.
2025-10-07 04:06:14,921 INFO - ✓ Worker 7: common_voice_en_18319951.mp3:  All right, so reply jock with great regret.
2025-10-07 04:06:15,038 INFO - ✓ Worker 5: common_voice_en_18319963.mp3:  She added with genuine sympathy in her face and voice.
2025-10-07 04:06:15,102 INFO - ✓ Worker 1: common_voice_en_18319931.mp3:  My uncle was a member of the Territorial Army.
2025-10-07 04:06:15,217 INFO - ✓ Worker 14: common_voice_en_18322797.mp3:  

Downloading batch 56:   2%|▏         | 1/48 [00:00<00:09,  5.07it/s]



Downloading batch 56:  40%|███▉      | 19/48 [00:00<00:00, 76.85it/s]



Downloading batch 56:  98%|█████████▊| 47/48 [00:00<00:00, 150.71it/s]



Downloading batch 56: 100%|██████████| 48/48 [00:00<00:00, 107.41it/s]

2025-10-07 04:06:25,921 INFO - Transcribing 48 files in parallel...





2025-10-07 04:06:29,007 INFO - ✓ Worker 9: common_voice_en_18324946.mp3:  The Falcon washed its prey.
2025-10-07 04:06:29,088 INFO - ✓ Worker 12: common_voice_en_18325965.mp3:  I know they are my oysters.
2025-10-07 04:06:29,233 INFO - ✓ Worker 11: common_voice_en_18324747.mp3:  But it contributed to this mash.
2025-10-07 04:06:29,346 INFO - ✓ Worker 3: common_voice_en_18324984.mp3:  There were 8 people in that role.
2025-10-07 04:06:29,543 INFO - ✓ Worker 13: common_voice_en_18322924.mp3:  Doctors write out many prescriptions for patients.
2025-10-07 04:06:29,647 INFO - ✓ Worker 7: common_voice_en_18324760.mp3:  The cheese omelet was burned badly.
2025-10-07 04:06:29,902 INFO - ✓ Worker 1: common_voice_en_18323083.mp3:  I love the beautiful colors of autumn, particularly September.
2025-10-07 04:06:29,968 INFO - ✓ Worker 0: common_voice_en_18324699.mp3:  Please do not think that I already know it all.
2025-10-07 04:06:29,980 INFO - ✓ Worker 4: common_voice_en_18325326.mp3:  A cafeteri

Downloading batch 57:  42%|████▏     | 20/48 [00:00<00:00, 93.96it/s]



Downloading batch 57:  83%|████████▎ | 40/48 [00:00<00:00, 134.85it/s]



Downloading batch 57: 100%|██████████| 48/48 [00:00<00:00, 118.68it/s]

2025-10-07 04:06:40,977 INFO - Transcribing 48 files in parallel...





2025-10-07 04:06:43,849 INFO - ✓ Worker 4: common_voice_en_18326977.mp3:  18, he added.
2025-10-07 04:06:43,993 INFO - ✓ Worker 14: common_voice_en_18326981.mp3:  He can care for himself.
2025-10-07 04:06:44,099 INFO - ✓ Worker 9: common_voice_en_18329026.mp3:  Then there was the campaign.
2025-10-07 04:06:44,162 INFO - ✓ Worker 10: common_voice_en_18327067.mp3:  Everybody carries a mobile phone nowadays.
2025-10-07 04:06:44,207 INFO - ✓ Worker 15: common_voice_en_18330554.mp3:  Everything seems to happen on Tuesday.
2025-10-07 04:06:44,395 INFO - ✓ Worker 3: common_voice_en_18326997.mp3:  Famine had been my great ally.
2025-10-07 04:06:44,403 INFO - ✓ Worker 8: common_voice_en_18326990.mp3:  Fit a bento over here.
2025-10-07 04:06:44,612 INFO - ✓ Worker 11: common_voice_en_18330546.mp3:  Now it was missing from the wall.
2025-10-07 04:06:44,699 INFO - ✓ Worker 1: common_voice_en_18326971.mp3:  Separate the yolk from the egg.
2025-10-07 04:06:44,843 INFO - ✓ Worker 0: common_voice_en_1

Downloading batch 58:   2%|▏         | 1/48 [00:00<00:07,  6.68it/s]



Downloading batch 58:  38%|███▊      | 18/48 [00:00<00:00, 84.50it/s]



Downloading batch 58:  71%|███████   | 34/48 [00:00<00:00, 111.50it/s]



Downloading batch 58: 100%|██████████| 48/48 [00:00<00:00, 108.41it/s]

2025-10-07 04:06:55,182 INFO - Transcribing 48 files in parallel...





2025-10-07 04:06:58,118 INFO - ✓ Worker 12: common_voice_en_18332143.mp3:  How much was it?
2025-10-07 04:06:58,181 INFO - ✓ Worker 6: common_voice_en_18331927.mp3:  That's a lava idea.
2025-10-07 04:06:58,234 INFO - ✓ Worker 1: common_voice_en_18331770.mp3:  Muhammad is a Muslim.
2025-10-07 04:06:58,265 INFO - ✓ Worker 10: common_voice_en_18332121.mp3:  It had been so easy.
2025-10-07 04:06:58,777 INFO - ✓ Worker 0: common_voice_en_18331798.mp3:  Shall we have a jacket potato for lunch?
2025-10-07 04:06:58,789 INFO - ✓ Worker 5: common_voice_en_18332311.mp3:  And this is what company use only.
2025-10-07 04:06:58,899 INFO - ✓ Worker 13: common_voice_en_18332329.mp3:  He obeyed the pressure of her hand.
2025-10-07 04:06:59,476 INFO - ✓ Worker 11: common_voice_en_18332431.mp3:  A ladybird is sometimes known as a ladybug.
2025-10-07 04:06:59,578 INFO - ✓ Worker 8: common_voice_en_18332018.mp3:  One if he has a lion dog, Charles suggested.
2025-10-07 04:06:59,603 INFO - ✓ Worker 3: common

Downloading batch 59:   2%|▏         | 1/48 [00:00<00:08,  5.70it/s]



Downloading batch 59:  40%|███▉      | 19/48 [00:00<00:00, 82.25it/s]



Downloading batch 59:  79%|███████▉  | 38/48 [00:00<00:00, 123.26it/s]



Downloading batch 59: 100%|██████████| 48/48 [00:00<00:00, 110.09it/s]

2025-10-07 04:07:09,682 INFO - Transcribing 48 files in parallel...





2025-10-07 04:07:13,190 INFO - ✓ Worker 5: common_voice_en_18335292.mp3:  More regulation is needed, not less.
2025-10-07 04:07:13,244 INFO - ✓ Worker 4: common_voice_en_18335540.mp3:  Such men believe when they come together.
2025-10-07 04:07:13,308 INFO - ✓ Worker 3: common_voice_en_18335368.mp3:  You're going in for grab sharing.
2025-10-07 04:07:13,464 INFO - ✓ Worker 13: common_voice_en_18335546.mp3:  Philip trust himself against it and entered.
2025-10-07 04:07:13,485 INFO - ✓ Worker 6: common_voice_en_18335329.mp3:  I could not agree with Arnest.
2025-10-07 04:07:13,705 INFO - ✓ Worker 0: common_voice_en_18335533.mp3:  Let them go out and eat my boys.
2025-10-07 04:07:13,797 INFO - ✓ Worker 9: common_voice_en_18335336.mp3:  There was something pathetic in the girl's attitude now.
2025-10-07 04:07:13,982 INFO - ✓ Worker 7: common_voice_en_18335342.mp3:  We handle two men already, both grub thieves.
2025-10-07 04:07:14,191 INFO - ✓ Worker 2: common_voice_en_18335536.mp3:  But this

Downloading batch 60:   2%|▏         | 1/48 [00:00<00:07,  6.56it/s]



Downloading batch 60:  73%|███████▎  | 35/48 [00:00<00:00, 121.62it/s]



Downloading batch 60: 100%|██████████| 48/48 [00:00<00:00, 107.52it/s]

2025-10-07 04:07:24,145 INFO - Transcribing 48 files in parallel...





2025-10-07 04:07:27,397 INFO - ✓ Worker 11: common_voice_en_18338331.mp3:  Hi, our Her vitals.
2025-10-07 04:07:27,493 INFO - ✓ Worker 1: common_voice_en_18338254.mp3:  Yeah, I will tell thee.
2025-10-07 04:07:28,282 INFO - ✓ Worker 14: common_voice_en_18337323.mp3:  For such countries nothing remains but reorganisation.
2025-10-07 04:07:28,558 INFO - ✓ Worker 10: common_voice_en_18338329.mp3:  He was fond of quoting a fragment from a certain poem.
2025-10-07 04:07:28,615 INFO - ✓ Worker 3: common_voice_en_18336407.mp3:  Finals are next week and I'm panicking.
2025-10-07 04:07:28,628 INFO - ✓ Worker 9: common_voice_en_18338278.mp3:  Tudor surveyed him with withering disgust.
2025-10-07 04:07:28,742 INFO - ✓ Worker 7: common_voice_en_18338253.mp3:  Only the chant sound had led him to observe them.
2025-10-07 04:07:28,884 INFO - ✓ Worker 6: common_voice_en_18336410.mp3:  Continuous integration has been a lifesaver for us.
2025-10-07 04:07:28,975 INFO - ✓ Worker 4: common_voice_en_1833743

Downloading batch 61:   2%|▏         | 1/48 [00:00<00:06,  6.77it/s]



Downloading batch 61:  38%|███▊      | 18/48 [00:00<00:00, 82.39it/s]



Downloading batch 61:  85%|████████▌ | 41/48 [00:00<00:00, 139.46it/s]



Downloading batch 61: 100%|██████████| 48/48 [00:00<00:00, 115.43it/s]

2025-10-07 04:07:38,571 INFO - Transcribing 48 files in parallel...





2025-10-07 04:07:42,022 INFO - ✓ Worker 13: common_voice_en_18341207.mp3:  His face was streaming with blood.
2025-10-07 04:07:42,720 INFO - ✓ Worker 5: common_voice_en_18341831.mp3:  A burst of laughter was easily wrought.
2025-10-07 04:07:42,775 INFO - ✓ Worker 9: common_voice_en_18341827.mp3:  He could feel a new stir in the land.
2025-10-07 04:07:42,941 INFO - ✓ Worker 6: common_voice_en_18343942.mp3:  Stay in this and please don't go.
2025-10-07 04:07:43,063 INFO - ✓ Worker 7: common_voice_en_18341832.mp3:  Boxing Day is the day after Christen's Day.
2025-10-07 04:07:43,091 INFO - ✓ Worker 4: common_voice_en_18340559.mp3:  Very few people knew of the existence of this law.
2025-10-07 04:07:43,160 INFO - ✓ Worker 0: common_voice_en_18340544.mp3:  But it won't continue, she said with easy confidence.
2025-10-07 04:07:43,255 INFO - ✓ Worker 1: common_voice_en_18341203.mp3:  For a time, the exciting thrill of his adventure was gone.
2025-10-07 04:07:43,760 INFO - ✓ Worker 15: common_v

Downloading batch 62:  46%|████▌     | 22/48 [00:00<00:00, 90.03it/s]



Downloading batch 62: 100%|██████████| 48/48 [00:00<00:00, 112.70it/s]

2025-10-07 04:07:53,996 INFO - Transcribing 48 files in parallel...





2025-10-07 04:07:56,281 INFO - ✓ Worker 12: common_voice_en_18351980.mp3:  Best regards!
2025-10-07 04:07:56,730 INFO - ✓ Worker 0: common_voice_en_18349094.mp3:  My watch is broken.
2025-10-07 04:07:56,969 INFO - ✓ Worker 7: common_voice_en_18351998.mp3:  I love the new logo.
2025-10-07 04:07:57,170 INFO - ✓ Worker 15: common_voice_en_18351999.mp3:  Goodbye Pierre, he shouted.
2025-10-07 04:07:57,223 INFO - ✓ Worker 11: common_voice_en_18347662.mp3:  Take notes. This is important.
2025-10-07 04:07:57,498 INFO - ✓ Worker 1: common_voice_en_18349095.mp3:  Both parties agree on the compromise.
2025-10-07 04:07:57,503 INFO - ✓ Worker 13: common_voice_en_18350546.mp3:  The forces of evil must not prevail.
2025-10-07 04:07:57,583 INFO - ✓ Worker 8: common_voice_en_18349096.mp3:  They ought to pass here sometime today.
2025-10-07 04:07:57,701 INFO - ✓ Worker 14: common_voice_en_18351791.mp3:  We have also optimized our supply chain.
2025-10-07 04:07:57,914 INFO - ✓ Worker 10: common_voice_en

Downloading batch 63:   2%|▏         | 1/48 [00:00<00:08,  5.87it/s]



Downloading batch 63:  44%|████▍     | 21/48 [00:00<00:00, 94.00it/s]



Downloading batch 63:  83%|████████▎ | 40/48 [00:00<00:00, 126.51it/s]



Downloading batch 63: 100%|██████████| 48/48 [00:00<00:00, 113.65it/s]

2025-10-07 04:08:07,696 INFO - Transcribing 48 files in parallel...





2025-10-07 04:08:10,711 INFO - ✓ Worker 8: common_voice_en_18355936.mp3:  Public transport is extremely efficient.
2025-10-07 04:08:10,961 INFO - ✓ Worker 0: common_voice_en_18355931.mp3:  Can you do me a favor?
2025-10-07 04:08:11,163 INFO - ✓ Worker 3: common_voice_en_18355713.mp3:  Also, she wouldn't walk.
2025-10-07 04:08:11,222 INFO - ✓ Worker 10: common_voice_en_18356108.mp3:  I hate the homeowner's association.
2025-10-07 04:08:11,517 INFO - ✓ Worker 6: common_voice_en_18367624.mp3:  I with Kevin would go with us.
2025-10-07 04:08:11,643 INFO - ✓ Worker 13: common_voice_en_18355934.mp3:  Take my advice and accept the vacation.
2025-10-07 04:08:11,688 INFO - ✓ Worker 4: common_voice_en_18355937.mp3:  Thus was momentum gained in the younger world.
2025-10-07 04:08:11,809 INFO - ✓ Worker 1: common_voice_en_18355930.mp3:  The churches are coming together to provide disaster relief.
2025-10-07 04:08:11,952 INFO - ✓ Worker 12: common_voice_en_18378837.mp3:  The spirit is waiting, but 

Downloading batch 64:   2%|▏         | 1/48 [00:00<00:08,  5.40it/s]



Downloading batch 64:  50%|█████     | 24/48 [00:00<00:00, 98.63it/s]



Downloading batch 64:  88%|████████▊ | 42/48 [00:00<00:00, 123.33it/s]



Downloading batch 64: 100%|██████████| 48/48 [00:00<00:00, 103.91it/s]

2025-10-07 04:08:21,935 INFO - Transcribing 48 files in parallel...





2025-10-07 04:08:24,396 INFO - ✓ Worker 14: common_voice_en_18402746.mp3:  Is that understood?
2025-10-07 04:08:24,636 INFO - ✓ Worker 11: common_voice_en_18402436.mp3:  She was kidding you.
2025-10-07 04:08:24,866 INFO - ✓ Worker 7: common_voice_en_18401993.mp3:  and give her the works.
2025-10-07 04:08:25,268 INFO - ✓ Worker 10: common_voice_en_18402588.mp3:  I hadn't made myself quite clear.
2025-10-07 04:08:25,289 INFO - ✓ Worker 6: common_voice_en_18402505.mp3:  That mint's paw was really tasty.
2025-10-07 04:08:25,329 INFO - ✓ Worker 9: common_voice_en_18402475.mp3:  He was frowning thoughtfully.
2025-10-07 04:08:25,360 INFO - ✓ Worker 15: common_voice_en_18401725.mp3:  Silence all round if you please.
2025-10-07 04:08:25,417 INFO - ✓ Worker 1: common_voice_en_18401716.mp3:  I'm struggling to begin writing my novel.
2025-10-07 04:08:25,502 INFO - ✓ Worker 5: common_voice_en_18402.mp3:  A yellow sign advertises insurance in Spanish.
2025-10-07 04:08:25,720 INFO - ✓ Worker 2: commo

Downloading batch 65:  42%|████▏     | 20/48 [00:00<00:00, 89.02it/s]



Downloading batch 65:  90%|████████▉ | 43/48 [00:00<00:00, 144.30it/s]



Downloading batch 65: 100%|██████████| 48/48 [00:00<00:00, 112.90it/s]

2025-10-07 04:08:35,127 INFO - Transcribing 48 files in parallel...





2025-10-07 04:08:37,827 INFO - ✓ Worker 12: common_voice_en_18404979.mp3:  The motor has failed.
2025-10-07 04:08:37,846 INFO - ✓ Worker 13: common_voice_en_18405.mp3:  Two men playing hockey.
2025-10-07 04:08:38,261 INFO - ✓ Worker 11: common_voice_en_18406919.mp3:  World prices are leveling out.
2025-10-07 04:08:38,268 INFO - ✓ Worker 10: common_voice_en_18404680.mp3:  Is one to have no privacy?
2025-10-07 04:08:38,366 INFO - ✓ Worker 14: common_voice_en_18404978.mp3:  You couldn't have done better.
2025-10-07 04:08:38,372 INFO - ✓ Worker 5: common_voice_en_18405741.mp3:  You married by any chance?
2025-10-07 04:08:38,663 INFO - ✓ Worker 3: common_voice_en_18406172.mp3:  An advanced progressist is a radical.
2025-10-07 04:08:38,665 INFO - ✓ Worker 4: common_voice_en_18406147.mp3:  You are presenting him with the drawing prize.
2025-10-07 04:08:38,826 INFO - ✓ Worker 7: common_voice_en_18404215.mp3:  It really doesn't have to be like that.
2025-10-07 04:08:38,843 INFO - ✓ Worker 2: co

Downloading batch 66:   2%|▏         | 1/48 [00:00<00:06,  7.48it/s]



Downloading batch 66:  27%|██▋       | 13/48 [00:00<00:00, 64.18it/s]



Downloading batch 66:  60%|██████    | 29/48 [00:00<00:00, 104.02it/s]



Downloading batch 66: 100%|██████████| 48/48 [00:00<00:00, 109.97it/s]

2025-10-07 04:08:48,347 INFO - Transcribing 48 files in parallel...





2025-10-07 04:08:51,355 INFO - ✓ Worker 6: common_voice_en_18408230.mp3:  Guilty is charged.
2025-10-07 04:08:51,508 INFO - ✓ Worker 8: common_voice_en_18407991.mp3:  Please don't say anymore.
2025-10-07 04:08:51,560 INFO - ✓ Worker 3: common_voice_en_18407977.mp3:  I'm talking about this man.
2025-10-07 04:08:51,658 INFO - ✓ Worker 15: common_voice_en_18408292.mp3:  Hold on for just a second.
2025-10-07 04:08:51,833 INFO - ✓ Worker 11: common_voice_en_18408.mp3:  A dog chasing a ball indoors.
2025-10-07 04:08:51,954 INFO - ✓ Worker 1: common_voice_en_18407973.mp3:  Yes, I was noticing at dinner.
2025-10-07 04:08:52,176 INFO - ✓ Worker 9: common_voice_en_18408075.mp3:  How could you have possibly lost all that money?
2025-10-07 04:08:52,292 INFO - ✓ Worker 0: common_voice_en_18407971.mp3:  You will need gloves. It's freezing outside.
2025-10-07 04:08:52,393 INFO - ✓ Worker 12: common_voice_en_18409998.mp3:  We can polish your car for 10 pounds.
2025-10-07 04:08:52,531 INFO - ✓ Worker 7

Downloading batch 67:  29%|██▉       | 14/48 [00:00<00:00, 64.91it/s]



Downloading batch 67:  77%|███████▋  | 37/48 [00:00<00:00, 130.78it/s]



Downloading batch 67: 100%|██████████| 48/48 [00:00<00:00, 110.17it/s]

2025-10-07 04:09:02,081 INFO - Transcribing 48 files in parallel...





2025-10-07 04:09:04,940 INFO - ✓ Worker 6: common_voice_en_18413084.mp3:  Give her a break.
2025-10-07 04:09:05,089 INFO - ✓ Worker 8: common_voice_en_18413253.mp3:  I read the thing again.
2025-10-07 04:09:05,091 INFO - ✓ Worker 0: common_voice_en_18412973.mp3:  I was not talking through.
2025-10-07 04:09:05,160 INFO - ✓ Worker 15: common_voice_en_18413262.mp3:  bring them forward lieutenant.
2025-10-07 04:09:05,182 INFO - ✓ Worker 2: common_voice_en_18412988.mp3:  No, don't do that.
2025-10-07 04:09:05,795 INFO - ✓ Worker 1: common_voice_en_18412961.mp3:  Frankie forgave Cruz what she owed.
2025-10-07 04:09:05,930 INFO - ✓ Worker 12: common_voice_en_18412989.mp3:  Come out from behind, Bench.
2025-10-07 04:09:06,045 INFO - ✓ Worker 7: common_voice_en_18413254.mp3:  How to deal with a rabid dog.
2025-10-07 04:09:06,058 INFO - ✓ Worker 14: common_voice_en_18413052.mp3:  That's a bit of a conundrum.
2025-10-07 04:09:06,071 INFO - ✓ Worker 3: common_voice_en_18412958.mp3:  And by Jeff, h

Downloading batch 68:  46%|████▌     | 22/48 [00:00<00:00, 98.64it/s]



Downloading batch 68:  85%|████████▌ | 41/48 [00:00<00:00, 129.91it/s]



Downloading batch 68: 100%|██████████| 48/48 [00:00<00:00, 99.55it/s] 

2025-10-07 04:09:15,305 INFO - Transcribing 48 files in parallel...





2025-10-07 04:09:18,360 INFO - ✓ Worker 4: common_voice_en_18417498.mp3:  Please, Polesman.
2025-10-07 04:09:18,481 INFO - ✓ Worker 1: common_voice_en_18415733.mp3:  Way of all 300 yards apart.
2025-10-07 04:09:18,553 INFO - ✓ Worker 10: common_voice_en_18415729.mp3:  That's a crazy coincidence.
2025-10-07 04:09:18,680 INFO - ✓ Worker 0: common_voice_en_18415728.mp3:  It will not be necessary, sir.
2025-10-07 04:09:18,836 INFO - ✓ Worker 12: common_voice_en_18419592.mp3:  They are equally unfit for office.
2025-10-07 04:09:18,873 INFO - ✓ Worker 3: common_voice_en_18415833.mp3:  All this was new stuff to me.
2025-10-07 04:09:18,933 INFO - ✓ Worker 9: common_voice_en_18417497.mp3:  I could chew holes in the steel door.
2025-10-07 04:09:19,409 INFO - ✓ Worker 2: common_voice_en_18415727.mp3:  Because prudence was always your strong suit.
2025-10-07 04:09:19,615 INFO - ✓ Worker 11: common_voice_en_18415815.mp3:  He was depressed and was always feeling fed up.
2025-10-07 04:09:19,814 INFO 

Downloading batch 69:  35%|███▌      | 17/48 [00:00<00:00, 80.48it/s]



Downloading batch 69:  83%|████████▎ | 40/48 [00:00<00:00, 137.58it/s]



Downloading batch 69: 100%|██████████| 48/48 [00:00<00:00, 118.74it/s]

2025-10-07 04:09:29,317 INFO - Transcribing 48 files in parallel...





2025-10-07 04:09:32,273 INFO - ✓ Worker 0: common_voice_en_18422052.mp3:  Oh, that, yes.
2025-10-07 04:09:32,776 INFO - ✓ Worker 4: common_voice_en_18422213.mp3:  It's all Greek to me.
2025-10-07 04:09:33,059 INFO - ✓ Worker 14: common_voice_en_18422181.mp3:  They clung together for dear life.
2025-10-07 04:09:33,246 INFO - ✓ Worker 2: common_voice_en_18422050.mp3:  I was going to do my rapping tonight.
2025-10-07 04:09:33,460 INFO - ✓ Worker 11: common_voice_en_18422423.mp3:  Certainly old bird, I said cordially.
2025-10-07 04:09:33,655 INFO - ✓ Worker 7: common_voice_en_18422056.mp3:  Americans say aluminum, Brits, Aluminium.
2025-10-07 04:09:33,688 INFO - ✓ Worker 15: common_voice_en_18422184.mp3:  Tay-Fight. Did I tell you?
2025-10-07 04:09:33,743 INFO - ✓ Worker 9: common_voice_en_18422141.mp3:  The cricket club's groundsman is called Bob.
2025-10-07 04:09:33,767 INFO - ✓ Worker 6: common_voice_en_18422055.mp3:  I very much doubt that will make any difference.
2025-10-07 04:09:34,

Downloading batch 70:   2%|▏         | 1/48 [00:00<00:07,  6.21it/s]



Downloading batch 70:  42%|████▏     | 20/48 [00:00<00:00, 91.02it/s]



Downloading batch 70:  90%|████████▉ | 43/48 [00:00<00:00, 145.76it/s]



Downloading batch 70: 100%|██████████| 48/48 [00:00<00:00, 112.39it/s]

2025-10-07 04:09:43,850 INFO - Transcribing 48 files in parallel...





2025-10-07 04:09:46,627 INFO - ✓ Worker 1: common_voice_en_18423497.mp3:  Nothing to do it.
2025-10-07 04:09:46,645 INFO - ✓ Worker 3: common_voice_en_18423781.mp3:  Please stick the books.
2025-10-07 04:09:46,874 INFO - ✓ Worker 10: common_voice_en_18423560.mp3:  I inspected my imagination.
2025-10-07 04:09:47,169 INFO - ✓ Worker 2: common_voice_en_18423500.mp3:  Hold the line a minute.
2025-10-07 04:09:47,428 INFO - ✓ Worker 0: common_voice_en_18423268.mp3:  Forlicious or dissembling arguments
2025-10-07 04:09:47,460 INFO - ✓ Worker 8: common_voice_en_18423708.mp3:  Can we do nothing about fake news?
2025-10-07 04:09:47,760 INFO - ✓ Worker 11: common_voice_en_18425.mp3:  Two people competing in a fencing competition.
2025-10-07 04:09:47,797 INFO - ✓ Worker 7: common_voice_en_18423556.mp3:  Oh no, call the fire brigade.
2025-10-07 04:09:47,934 INFO - ✓ Worker 15: common_voice_en_18423541.mp3:  Racing cars are essential to motorsport.
2025-10-07 04:09:48,180 INFO - ✓ Worker 5: common_v

Downloading batch 71:  33%|███▎      | 16/48 [00:00<00:00, 74.42it/s]



Downloading batch 71:  83%|████████▎ | 40/48 [00:00<00:00, 140.34it/s]



Downloading batch 71: 100%|██████████| 48/48 [00:00<00:00, 117.74it/s]

2025-10-07 04:09:57,040 INFO - Transcribing 48 files in parallel...





2025-10-07 04:09:59,546 INFO - ✓ Worker 2: common_voice_en_18427523.mp3:  Yep, right.
2025-10-07 04:09:59,870 INFO - ✓ Worker 0: common_voice_en_18427512.mp3:  Who logged this door?
2025-10-07 04:09:59,969 INFO - ✓ Worker 7: common_voice_en_18428444.mp3:  She will, will she.
2025-10-07 04:09:59,984 INFO - ✓ Worker 10: common_voice_en_18427513.mp3:  It is nearly dinner time.
2025-10-07 04:10:00,066 INFO - ✓ Worker 12: common_voice_en_18427655.mp3:  I do myself a puppet.
2025-10-07 04:10:00,574 INFO - ✓ Worker 8: common_voice_en_18427653.mp3:  Well, I have really little heads.
2025-10-07 04:10:00,751 INFO - ✓ Worker 14: common_voice_en_18427607.mp3:  Remembering is most satisfying than forgetting.
2025-10-07 04:10:00,837 INFO - ✓ Worker 4: common_voice_en_18427561.mp3:  I honestly can't do it as n1.
2025-10-07 04:10:00,889 INFO - ✓ Worker 11: common_voice_en_18428428.mp3:  Please use your full address, including postcode.
2025-10-07 04:10:00,934 INFO - ✓ Worker 3: common_voice_en_1842790

Downloading batch 72:   2%|▏         | 1/48 [00:00<00:06,  7.07it/s]



Downloading batch 72:  42%|████▏     | 20/48 [00:00<00:00, 96.15it/s]



Downloading batch 72:  88%|████████▊ | 42/48 [00:00<00:00, 145.80it/s]



Downloading batch 72: 100%|██████████| 48/48 [00:00<00:00, 126.48it/s]

2025-10-07 04:10:11,080 INFO - Transcribing 48 files in parallel...





2025-10-07 04:10:14,050 INFO - ✓ Worker 10: common_voice_en_18429653.mp3:  Let us get this straight.
2025-10-07 04:10:14,146 INFO - ✓ Worker 6: common_voice_en_18431039.mp3:  You see what will happen.
2025-10-07 04:10:14,490 INFO - ✓ Worker 3: common_voice_en_18431489.mp3:  Clear what it is, passionately.
2025-10-07 04:10:14,687 INFO - ✓ Worker 11: common_voice_en_18429635.mp3:  You can't have good news about it.
2025-10-07 04:10:14,769 INFO - ✓ Worker 9: common_voice_en_18429666.mp3:  I am not devoted to food at all.
2025-10-07 04:10:14,992 INFO - ✓ Worker 5: common_voice_en_18429649.mp3:  I'm not your personal butler, Jake.
2025-10-07 04:10:15,036 INFO - ✓ Worker 4: common_voice_en_18431042.mp3:  He weighed out the sausages carefully on the scale.
2025-10-07 04:10:15,246 INFO - ✓ Worker 0: common_voice_en_18431.mp3:  A child at the top of a fake rock wall.
2025-10-07 04:10:15,299 INFO - ✓ Worker 15: common_voice_en_18431642.mp3:  I really don't fancy going out today.
2025-10-07 04:10

Downloading batch 73:  48%|████▊     | 23/48 [00:00<00:00, 97.67it/s]



Downloading batch 73:  98%|█████████▊| 47/48 [00:00<00:00, 150.56it/s]



Downloading batch 73: 100%|██████████| 48/48 [00:00<00:00, 122.10it/s]

2025-10-07 04:10:24,275 INFO - Transcribing 48 files in parallel...





2025-10-07 04:10:27,223 INFO - ✓ Worker 3: common_voice_en_18434571.mp3:  This is blended news.
2025-10-07 04:10:27,243 INFO - ✓ Worker 8: common_voice_en_18434939.mp3:  You pride yourself on it.
2025-10-07 04:10:27,479 INFO - ✓ Worker 13: common_voice_en_18434573.mp3:  Are you registered to vote?
2025-10-07 04:10:27,494 INFO - ✓ Worker 1: common_voice_en_18434560.mp3:  I just love chocolate chip cookies.
2025-10-07 04:10:27,621 INFO - ✓ Worker 11: common_voice_en_18434587.mp3:  She felt completely wrong out.
2025-10-07 04:10:28,054 INFO - ✓ Worker 4: common_voice_en_18434938.mp3:  I can't believe what they're saying.
2025-10-07 04:10:28,300 INFO - ✓ Worker 12: common_voice_en_18435068.mp3:  Could you put the saws on the side?
2025-10-07 04:10:28,560 INFO - ✓ Worker 5: common_voice_en_18434686.mp3:  The alien was a rather fetching aquamarine color.
2025-10-07 04:10:28,666 INFO - ✓ Worker 2: common_voice_en_18434595.mp3:  who are normally clean as the lieu in your house.
2025-10-07 04:1

Downloading batch 74:   2%|▏         | 1/48 [00:00<00:07,  6.29it/s]



Downloading batch 74:  42%|████▏     | 20/48 [00:00<00:00, 89.58it/s]



Downloading batch 74:  81%|████████▏ | 39/48 [00:00<00:00, 126.06it/s]



Downloading batch 74: 100%|██████████| 48/48 [00:00<00:00, 115.55it/s]

2025-10-07 04:10:38,288 INFO - Transcribing 48 files in parallel...





2025-10-07 04:10:41,258 INFO - ✓ Worker 2: common_voice_en_18437351.mp3:  Innovation takes perspiration.
2025-10-07 04:10:41,305 INFO - ✓ Worker 7: common_voice_en_18438266.mp3:  A laudable sentiment.
2025-10-07 04:10:41,517 INFO - ✓ Worker 9: common_voice_en_18437.mp3:  Young girls are doing a dance.
2025-10-07 04:10:41,929 INFO - ✓ Worker 0: common_voice_en_18436938.mp3:  Not a bit of good, try.
2025-10-07 04:10:42,017 INFO - ✓ Worker 6: common_voice_en_18437552.mp3:  What a busy little brain it is.
2025-10-07 04:10:42,022 INFO - ✓ Worker 15: common_voice_en_18437554.mp3:  I could see that this had moved him.
2025-10-07 04:10:42,101 INFO - ✓ Worker 8: common_voice_en_18437569.mp3:  A song, a girl, a maid.
2025-10-07 04:10:42,459 INFO - ✓ Worker 5: common_voice_en_18437358.mp3:  She thought she was going to be stung.
2025-10-07 04:10:42,507 INFO - ✓ Worker 4: common_voice_en_18437354.mp3:  We have a very committed licensee and Singapore.
2025-10-07 04:10:42,550 INFO - ✓ Worker 10: com

Downloading batch 75:   2%|▏         | 1/48 [00:00<00:06,  7.34it/s]



Downloading batch 75:  79%|███████▉  | 38/48 [00:00<00:00, 129.74it/s]



Downloading batch 75: 100%|██████████| 48/48 [00:00<00:00, 110.14it/s]

2025-10-07 04:10:52,198 INFO - Transcribing 48 files in parallel...





2025-10-07 04:10:55,533 INFO - ✓ Worker 11: common_voice_en_18444984.mp3:  Men, women, and children.
2025-10-07 04:10:55,860 INFO - ✓ Worker 0: common_voice_en_18444408.mp3:  Unto us, a boy is born.
2025-10-07 04:10:55,876 INFO - ✓ Worker 8: common_voice_en_18448505.mp3:  What brand of toilet paper do you use?
2025-10-07 04:10:56,169 INFO - ✓ Worker 2: common_voice_en_18444219.mp3:  It was a beautiful little sea animony.
2025-10-07 04:10:56,248 INFO - ✓ Worker 14: common_voice_en_18448590.mp3:  We need to get to the bottom of this.
2025-10-07 04:10:56,365 INFO - ✓ Worker 10: common_voice_en_18444649.mp3:  Well, I have been doing that all right.
2025-10-07 04:10:56,457 INFO - ✓ Worker 3: common_voice_en_18444.mp3:  A brown dog running on the beach near the ocean.
2025-10-07 04:10:56,458 INFO - ✓ Worker 1: common_voice_en_18444156.mp3:  Is it a bad thing to be a perfectionist?
2025-10-07 04:10:56,508 INFO - ✓ Worker 4: common_voice_en_18448522.mp3:  The image was too faint and needed to 

Downloading batch 76:   2%|▏         | 1/48 [00:00<00:07,  6.33it/s]



Downloading batch 76:  38%|███▊      | 18/48 [00:00<00:00, 83.27it/s]



Downloading batch 76:  81%|████████▏ | 39/48 [00:00<00:00, 129.43it/s]



Downloading batch 76: 100%|██████████| 48/48 [00:00<00:00, 104.93it/s]

2025-10-07 04:11:06,574 INFO - Transcribing 48 files in parallel...





2025-10-07 04:11:09,293 INFO - ✓ Worker 2: common_voice_en_18451127.mp3:  The little bunnies.
2025-10-07 04:11:09,500 INFO - ✓ Worker 15: common_voice_en_18453420.mp3:  Quite the tortured soul.
2025-10-07 04:11:09,605 INFO - ✓ Worker 9: common_voice_en_18452918.mp3:  Obviously, I'm biased.
2025-10-07 04:11:09,899 INFO - ✓ Worker 0: common_voice_en_18452919.mp3:  It's based on a true story.
2025-10-07 04:11:10,175 INFO - ✓ Worker 3: common_voice_en_18452923.mp3:  Did he recede from his position?
2025-10-07 04:11:10,197 INFO - ✓ Worker 10: common_voice_en_18452910.mp3:  the battle hymn of the Republic.
2025-10-07 04:11:10,404 INFO - ✓ Worker 5: common_voice_en_18453105.mp3:  Gemma was a most wonderful mimic.
2025-10-07 04:11:10,489 INFO - ✓ Worker 4: common_voice_en_18452912.mp3:  Have you read Alice in Wonderland?
2025-10-07 04:11:10,504 INFO - ✓ Worker 12: common_voice_en_18453377.mp3:  Do you prefer white coffee or black?
2025-10-07 04:11:10,511 INFO - ✓ Worker 1: common_voice_en_1845

Downloading batch 77:   2%|▏         | 1/48 [00:00<00:08,  5.50it/s]



Downloading batch 77:  44%|████▍     | 21/48 [00:00<00:00, 88.80it/s]



Downloading batch 77:  92%|█████████▏| 44/48 [00:00<00:00, 141.40it/s]



Downloading batch 77: 100%|██████████| 48/48 [00:00<00:00, 119.31it/s]

2025-10-07 04:11:19,891 INFO - Transcribing 48 files in parallel...





2025-10-07 04:11:22,183 INFO - ✓ Worker 1: common_voice_en_18454019.mp3:  Tell me more.
2025-10-07 04:11:22,477 INFO - ✓ Worker 5: common_voice_en_18454037.mp3:  Must you go?
2025-10-07 04:11:23,545 INFO - ✓ Worker 9: common_voice_en_18454212.mp3:  I waived a John D. Hand.
2025-10-07 04:11:23,587 INFO - ✓ Worker 11: common_voice_en_18454254.mp3:  I really appreciate a good noir thriller.
2025-10-07 04:11:23,626 INFO - ✓ Worker 0: common_voice_en_18454200.mp3:  I regret not staying in touch with Roger.
2025-10-07 04:11:23,643 INFO - ✓ Worker 8: common_voice_en_18454446.mp3:  And why do I despise him?
2025-10-07 04:11:23,875 INFO - ✓ Worker 12: common_voice_en_18454307.mp3:  I felt that your need was greater than mine.
2025-10-07 04:11:24,262 INFO - ✓ Worker 6: common_voice_en_18454202.mp3:  They had an illicit liaison in the gazebo.
2025-10-07 04:11:24,339 INFO - ✓ Worker 7: common_voice_en_18454445.mp3:  The engine raced, the clutch slid into position.
2025-10-07 04:11:24,424 INFO - ✓ 

Downloading batch 78:  44%|████▍     | 21/48 [00:00<00:00, 97.99it/s]



Downloading batch 78:  83%|████████▎ | 40/48 [00:00<00:00, 132.11it/s]



Downloading batch 78: 100%|██████████| 48/48 [00:00<00:00, 119.06it/s]

2025-10-07 04:11:33,392 INFO - Transcribing 48 files in parallel...





2025-10-07 04:11:36,101 INFO - ✓ Worker 1: common_voice_en_18455849.mp3:  Ta very much
2025-10-07 04:11:36,522 INFO - ✓ Worker 6: common_voice_en_18456364.mp3:  She's really pushing her luck.
2025-10-07 04:11:36,698 INFO - ✓ Worker 7: common_voice_en_184565.mp3:  To be a mighty grandee.
2025-10-07 04:11:36,963 INFO - ✓ Worker 10: common_voice_en_18455864.mp3:  Want to stop it just this once?
2025-10-07 04:11:37,154 INFO - ✓ Worker 14: common_voice_en_18456639.mp3:  Christy didn't like to mention it.
2025-10-07 04:11:37,433 INFO - ✓ Worker 5: common_voice_en_18455867.mp3:  Aniseed balls are one of my favourite sweets.
2025-10-07 04:11:37,516 INFO - ✓ Worker 11: common_voice_en_18456580.mp3:  Alec acknowledges that he is a complete nerd.
2025-10-07 04:11:37,904 INFO - ✓ Worker 3: common_voice_en_18455889.mp3:  And you came away feeling that he needed a bracer.
2025-10-07 04:11:38,022 INFO - ✓ Worker 15: common_voice_en_18456360.mp3:  Displaying Nazi symbols or propaganda is against the l

Downloading batch 79:  40%|███▉      | 19/48 [00:00<00:00, 85.44it/s]



Downloading batch 79:  96%|█████████▌| 46/48 [00:00<00:00, 152.93it/s]



Downloading batch 79: 100%|██████████| 48/48 [00:00<00:00, 110.08it/s]

2025-10-07 04:11:46,782 INFO - Transcribing 48 files in parallel...





2025-10-07 04:11:49,340 INFO - ✓ Worker 7: common_voice_en_18460460.mp3:  You take what?
2025-10-07 04:11:49,380 INFO - ✓ Worker 13: common_voice_en_18460454.mp3:  The same noise.
2025-10-07 04:11:49,559 INFO - ✓ Worker 10: common_voice_en_18460449.mp3:  Nor can I imagine.
2025-10-07 04:11:49,937 INFO - ✓ Worker 9: common_voice_en_18460453.mp3:  I have no complaints to make.
2025-10-07 04:11:50,047 INFO - ✓ Worker 3: common_voice_en_18460233.mp3:  Those wistful eyes of hers.
2025-10-07 04:11:50,326 INFO - ✓ Worker 15: common_voice_en_18460551.mp3:  There was the animal loser.
2025-10-07 04:11:50,488 INFO - ✓ Worker 11: common_voice_en_18460733.mp3:  My wife sings in the local choral society.
2025-10-07 04:11:50,515 INFO - ✓ Worker 1: common_voice_en_18460246.mp3:  In the spring, the former served his seeds.
2025-10-07 04:11:50,612 INFO - ✓ Worker 6: common_voice_en_18460234.mp3:  White Leckhorn said Mrs. Mortimer.
2025-10-07 04:11:50,624 INFO - ✓ Worker 2: common_voice_en_18460240.mp3:

Downloading batch 80:  40%|███▉      | 19/48 [00:00<00:00, 96.44it/s]



Downloading batch 80:  79%|███████▉  | 38/48 [00:00<00:00, 135.86it/s]



Downloading batch 80: 100%|██████████| 48/48 [00:00<00:00, 127.99it/s]

2025-10-07 04:12:00,100 INFO - Transcribing 48 files in parallel...





2025-10-07 04:12:03,135 INFO - ✓ Worker 6: common_voice_en_18465459.mp3:  Here is in critical condition.
2025-10-07 04:12:03,386 INFO - ✓ Worker 11: common_voice_en_18466081.mp3:  and the air was growing chilly.
2025-10-07 04:12:03,627 INFO - ✓ Worker 3: common_voice_en_18466092.mp3:  He made a weary jester.
2025-10-07 04:12:03,726 INFO - ✓ Worker 15: common_voice_en_18467067.mp3:  Yes, lovely, isn't it?
2025-10-07 04:12:03,924 INFO - ✓ Worker 14: common_voice_en_18466099.mp3:  Their prized collection of ornaments was stolen.
2025-10-07 04:12:04,214 INFO - ✓ Worker 4: common_voice_en_18465528.mp3:  The air in the hut was close and oppressive.
2025-10-07 04:12:04,441 INFO - ✓ Worker 9: common_voice_en_18466083.mp3:  There's nothing for bringing you from learning to play guitar.
2025-10-07 04:12:04,446 INFO - ✓ Worker 2: common_voice_en_18466098.mp3:  A club acquaintance and a mere one at that.
2025-10-07 04:12:04,547 INFO - ✓ Worker 13: common_voice_en_18465340.mp3:  Ask if she needs a 