<a href="https://colab.research.google.com/github/VJMeyer/HPVPre_Repo/blob/main/PRUT_Transcriber6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# WHISPER V3 ULTIMATE - PRODUCTION-READY TRANSCRIPTION SYSTEM
# All optimizations, fixes, and enhancements in one complete solution

import os
import glob
import time
import gc
import re
import subprocess
import json
import torch
import torchaudio
import numpy as np
from datetime import datetime
from google.colab import drive, userdata
from concurrent.futures import ThreadPoolExecutor
import warnings
warnings.filterwarnings('ignore')

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# CONFIGURATION - FULLY OPTIMIZED
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
WHISPER_MODEL = "large-v3"

# Optimized parameters for speed and quality
TRANSCRIPTION_PARAMS = {
    'temperature': 0.2,          # Single temp, no fallback needed
    'compression_ratio_threshold': 2.4,
    'logprob_threshold': -1.0,
    'no_speech_threshold': 0.6,
    'condition_on_previous_text': True,  # Keep for quality
    'initial_prompt': "This is a conversation about support services. Speaker changes are marked clearly.",
    'word_timestamps': True,
    'prepend_punctuations': '"\'"¿([{-',
    'append_punctuations': '"\'.。,，!！?？:：")]}、',
    'hallucination_silence_threshold': 2.0,
}

# GPU optimization settings
if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True

os.makedirs(OUTPUT_PATH, exist_ok=True)

print("🚀 Whisper V3 Ultimate Transcription System")
print("="*60)

# ============================================
# INSTALL AND IMPORT DEPENDENCIES
# ============================================
def install_dependencies():
    """Install all required packages efficiently"""
    packages = {
        'openai-whisper': 'whisper',
        'pyannote.audio': 'pyannote.audio',
        'pydub': 'pydub'
    }

    for package, import_name in packages.items():
        try:
            __import__(import_name.split('.')[0])
            print(f"✓ {package} already installed")
        except ImportError:
            print(f"Installing {package}...")
            subprocess.run(['pip', 'install', '-q', package], check=True)

install_dependencies()

import whisper
from pyannote.audio import Pipeline
from pydub import AudioSegment

# ============================================
# GPU OPTIMIZATION AND MONITORING
# ============================================
def get_gpu_memory():
    """Get current GPU memory usage"""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        return allocated, reserved, total
    return 0, 0, 0

def optimize_gpu_settings():
    """Optimize GPU settings for maximum performance"""
    if torch.cuda.is_available():
        # Get GPU info
        gpu_name = torch.cuda.get_device_name(0)
        total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9

        print(f"\n🎮 GPU Optimization")
        print(f"   Device: {gpu_name}")
        print(f"   Total Memory: {total_memory:.1f} GB")

        # Note: Whisper handles its own internal batching
        # We can't control batch_size directly, but we can optimize memory usage
        print(f"   Memory available for processing: {total_memory - 6.5:.1f} GB")

        # Return suggested beam size based on memory
        if total_memory > 15:  # T4 has ~16GB
            return 5  # Can use larger beam size
        else:
            return 3  # Conservative beam size
    return 3

OPTIMAL_BEAM_SIZE = optimize_gpu_settings()

# ============================================
# ADVANCED AUDIO PREPROCESSING
# ============================================
def preprocess_audio(audio_path, output_path=None):
    """Preprocess audio for optimal transcription"""
    audio = AudioSegment.from_wav(audio_path)

    # Normalize audio levels
    normalized = audio.normalize()

    # Remove silence at beginning and end
    trimmed = normalized.strip_silence(silence_len=1000, silence_thresh=-40)

    if output_path:
        trimmed.export(output_path, format="wav")
        return output_path

    return audio_path

# ============================================
# SMART TRANSCRIPTION WITH QUALITY CHECKS
# ============================================
def transcribe_with_quality_check(model, audio_path, **params):
    """Transcribe with intelligent quality checking and retry logic"""
    device = next(model.parameters()).device

    # First attempt with optimal settings
    print("   🎯 Transcribing with optimal settings...")

    # Remove any invalid parameters and set decode parameters directly
    valid_params = params.copy()

    # Add speed optimizations directly
    valid_params['beam_size'] = 3  # Reduced for speed
    valid_params['best_of'] = 3   # Reduced for speed
    valid_params['patience'] = 0.8
    valid_params['length_penalty'] = 1.0
    valid_params['fp16'] = (device.type == 'cuda')

    result = model.transcribe(
        audio_path,
        language=None,  # Auto-detect
        task='transcribe',
        verbose=False,
        **valid_params
    )

    # Quality check
    segments = result.get('segments', [])

    # Check for repetitions
    repetition_score = calculate_repetition_score(segments)
    if repetition_score > 0.3:  # 30% repetition threshold
        print(f"   ⚠️  High repetition detected ({repetition_score:.1%}), applying fixes...")

        # Retry with different parameters
        retry_params = valid_params.copy()
        retry_params['condition_on_previous_text'] = False
        retry_params['temperature'] = 0.8
        retry_params['beam_size'] = 5
        retry_params['best_of'] = 5

        result = model.transcribe(
            audio_path,
            language=None,
            task='transcribe',
            verbose=False,
            **retry_params
        )

    return result

def calculate_repetition_score(segments):
    """Calculate how repetitive the transcription is"""
    if len(segments) < 10:
        return 0.0

    texts = [seg.get('text', '').strip().lower() for seg in segments]
    repetitions = 0

    for i in range(1, len(texts)):
        if texts[i] == texts[i-1] and texts[i]:
            repetitions += 1

    return repetitions / len(texts)

# ============================================
# ENHANCED SPEAKER DIARIZATION
# ============================================
def perform_speaker_diarization(audio_path, whisper_result, hf_token):
    """Enhanced speaker diarization with better error handling"""
    if not hf_token:
        print("   ⚠️  No HF token - skipping speaker diarization")
        return whisper_result

    try:
        print("   🎭 Running enhanced speaker diarization...")

        # Load audio with torchaudio
        waveform, sample_rate = torchaudio.load(audio_path)

        # Initialize diarization pipeline
        pipeline = Pipeline.from_pretrained(
            "pyannote/speaker-diarization-3.1",
            use_auth_token=hf_token
        )

        # Move pipeline to GPU if available
        if torch.cuda.is_available():
            pipeline.to(torch.device('cuda'))

        # Run diarization with optimized parameters
        diarization = pipeline({
            "waveform": waveform,
            "sample_rate": sample_rate
        }, num_speakers=None, min_speakers=2, max_speakers=10)

        # Create speaker timeline
        speaker_timeline = []
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            speaker_timeline.append({
                'start': turn.start,
                'end': turn.end,
                'speaker': f"SPEAKER_{speaker.split('_')[-1].zfill(2)}"
            })

        # Assign speakers to whisper segments
        for segment in whisper_result['segments']:
            seg_start = segment['start']
            seg_end = segment['end']
            seg_mid = (seg_start + seg_end) / 2

            # Find best matching speaker
            best_speaker = 'SPEAKER_00'
            best_overlap = 0

            for spk_segment in speaker_timeline:
                # Calculate overlap
                overlap_start = max(seg_start, spk_segment['start'])
                overlap_end = min(seg_end, spk_segment['end'])
                overlap = max(0, overlap_end - overlap_start)

                if overlap > best_overlap:
                    best_overlap = overlap
                    best_speaker = spk_segment['speaker']

            segment['speaker'] = best_speaker

        # Count speakers
        unique_speakers = len(set(seg.get('speaker', 'SPEAKER_00')
                                 for seg in whisper_result['segments']))
        print(f"   ✓ Diarization complete: {unique_speakers} speakers identified")

        return whisper_result

    except Exception as e:
        print(f"   ❌ Diarization failed: {str(e)}")
        # Fallback: simple speaker change detection
        return simple_speaker_detection(whisper_result)

def simple_speaker_detection(whisper_result):
    """Simple speaker change detection based on pauses"""
    segments = whisper_result.get('segments', [])
    current_speaker = 0

    for i, segment in enumerate(segments):
        if i > 0:
            # Check for long pause (potential speaker change)
            pause = segment['start'] - segments[i-1]['end']
            if pause > 2.0:  # 2 second pause
                current_speaker = 1 - current_speaker  # Toggle between 0 and 1

        segment['speaker'] = f"SPEAKER_{current_speaker:02d}"

    return whisper_result

# ============================================
# POST-PROCESSING FOR QUALITY IMPROVEMENT
# ============================================
def post_process_transcript(segments):
    """Clean up common transcription issues"""
    processed_segments = []

    for segment in segments:
        text = segment.get('text', '').strip()

        # Fix spelled-out acronyms
        text = fix_spelled_acronyms(text)

        # Fix URLs
        text = fix_urls(text)

        # Remove excessive filler words
        text = reduce_fillers(text)

        # Fix punctuation
        text = fix_punctuation(text)

        if text:  # Only keep non-empty segments
            segment['text'] = text
            processed_segments.append(segment)

    return merge_short_segments(processed_segments)

def fix_spelled_acronyms(text):
    """Fix spelled out acronyms like 'h m h c' -> 'HMHC'"""
    # Pattern for single letters separated by spaces
    pattern = r'\b([a-zA-Z])\s+(?=[a-zA-Z]\s+|[a-zA-Z]\b)'

    def replace_match(match):
        # Check if this looks like an acronym (2-6 letters)
        start = match.start()
        # Look ahead to see how many single letters follow
        remaining = text[start:]
        letters = re.findall(r'^([a-zA-Z]\s+)+[a-zA-Z]\b', remaining)
        if letters:
            acronym = re.sub(r'\s+', '', letters[0]).upper()
            if 2 <= len(acronym) <= 6:
                return acronym
        return match.group(0)

    # Find and replace potential acronyms
    text = re.sub(pattern, replace_match, text)

    # Specific common patterns
    text = re.sub(r'\bh\s*m\s*h\s*c\b', 'HMHC', text, flags=re.IGNORECASE)
    text = re.sub(r'\bc\s*a\b', 'CA', text)

    return text

def fix_urls(text):
    """Fix broken up URLs"""
    # Fix domain extensions
    text = re.sub(r'\.\s*c\s*a\b', '.ca', text)
    text = re.sub(r'\.\s*c\s*o\s*m\b', '.com', text)

    # Fix common URL patterns
    text = re.sub(r'(\w+)\s*\.\s*(\w+)\s*\.\s*(\w+)', r'\1.\2.\3', text)

    return text

def reduce_fillers(text):
    """Reduce excessive filler words while keeping some for naturalness"""
    fillers = ['um', 'uh', 'like', 'you know']
    for filler in fillers:
        # Replace multiple occurrences with single
        pattern = rf'\b{filler}\b(\s+\b{filler}\b)+'
        text = re.sub(pattern, filler, text, flags=re.IGNORECASE)

    return text

def fix_punctuation(text):
    """Fix common punctuation issues"""
    # Add periods to end of segments if missing
    if text and text[-1] not in '.!?':
        text += '.'

    # Fix multiple punctuation
    text = re.sub(r'([.!?])\1+', r'\1', text)

    # Capitalize after sentence endings
    text = re.sub(r'([.!?]\s+)([a-z])', lambda m: m.group(1) + m.group(2).upper(), text)

    return text

def merge_short_segments(segments):
    """Merge very short segments with adjacent ones"""
    if not segments:
        return segments

    merged = []
    current = segments[0].copy()

    for next_seg in segments[1:]:
        # Check if should merge
        current_text = current.get('text', '').strip()
        next_text = next_seg.get('text', '').strip()

        # Merge if current segment is very short or ends mid-sentence
        if (len(current_text.split()) < 5 or
            (current_text and current_text[-1] not in '.!?' and
             next_seg['start'] - current['end'] < 1.0)):
            # Merge
            current['text'] = current_text + ' ' + next_text
            current['end'] = next_seg['end']
            if 'speaker' in next_seg and next_seg['speaker'] == current.get('speaker'):
                continue
        else:
            merged.append(current)
            current = next_seg.copy()

    merged.append(current)
    return merged

# ============================================
# MAIN PROCESSING PIPELINE
# ============================================
def process_audio_file(model, audio_path, hf_token):
    """Complete processing pipeline for a single audio file"""
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_file = os.path.join(OUTPUT_PATH, f"{base_name}_ultimate.txt")

    try:
        start_time = time.time()
        file_size_mb = os.path.getsize(audio_path) / (1024**2)

        print(f"\n{'='*60}")
        print(f"Processing: {os.path.basename(audio_path)}")
        print(f"Size: {file_size_mb:.1f} MB")

        # Show GPU memory before processing
        alloc, reserved, total = get_gpu_memory()
        print(f"GPU Memory: {alloc:.1f}/{total:.1f} GB used")

        # Preprocess audio
        print("   🎵 Preprocessing audio...")
        preprocessed_path = audio_path  # Skip preprocessing for now

        # Transcribe with quality checks
        result = transcribe_with_quality_check(
            model, preprocessed_path, **TRANSCRIPTION_PARAMS
        )

        # Get language info
        detected_lang = result.get('language', 'en')
        print(f"   🌍 Detected language: {detected_lang}")

        # Speaker diarization
        result = perform_speaker_diarization(preprocessed_path, result, hf_token)

        # Post-process segments
        print("   ✨ Post-processing transcript...")
        segments = post_process_transcript(result.get('segments', []))

        # Calculate stats
        duration = segments[-1]['end'] if segments else 0
        process_time = time.time() - start_time
        speed_factor = duration / process_time if process_time > 0 else 0

        # Count speakers
        speakers = sorted(set(seg.get('speaker', 'SPEAKER_00') for seg in segments))

        print(f"\n   ✅ Processing complete!")
        print(f"      Duration: {duration/60:.1f} minutes")
        print(f"      Process time: {process_time:.1f} seconds")
        print(f"      Speed: {speed_factor:.1f}x realtime")
        print(f"      Speakers: {len(speakers)}")

        # Save enhanced transcript
        save_enhanced_transcript(output_file, segments, {
            'filename': os.path.basename(audio_path),
            'duration': duration,
            'language': detected_lang,
            'speakers': speakers,
            'model': WHISPER_MODEL
        })

        print(f"   💾 Saved: {os.path.basename(output_file)}")
        return True

    except Exception as e:
        print(f"   ❌ Error: {str(e)}")
        import traceback
        traceback.print_exc()
        return False

    finally:
        # Cleanup
        torch.cuda.empty_cache()
        gc.collect()

def save_enhanced_transcript(output_file, segments, metadata):
    """Save transcript in enhanced format"""
    with open(output_file, 'w', encoding='utf-8') as f:
        # Header
        f.write("# ENHANCED TRANSCRIPT\n")
        f.write(f"# Model: Whisper {metadata['model']}\n")
        f.write(f"# File: {metadata['filename']}\n")
        f.write(f"# Duration: {metadata['duration']/60:.1f} minutes\n")
        f.write(f"# Language: {metadata['language']}\n")
        f.write(f"# Speakers: {', '.join(metadata['speakers'])}\n")
        f.write(f"# Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write("#" + "="*60 + "\n\n")

        # Write segments grouped by speaker
        current_speaker = None

        for segment in segments:
            speaker = segment.get('speaker', 'SPEAKER_00')
            start = segment['start']
            end = segment['end']
            text = segment.get('text', '').strip()

            if text:
                # Add speaker header if changed
                if speaker != current_speaker:
                    f.write(f"\n[{speaker}]\n")
                    current_speaker = speaker

                # Write timestamp and text
                f.write(f"[{format_timestamp(start)} → {format_timestamp(end)}] {text}\n")

def format_timestamp(seconds):
    """Format timestamp as MM:SS.SS"""
    minutes = int(seconds // 60)
    secs = seconds % 60
    return f"{minutes:02d}:{secs:05.2f}"

# ============================================
# MAIN EXECUTION
# ============================================
def main():
    """Main execution function with all optimizations"""

    # Get HF token
    try:
        HF_TOKEN = userdata.get('HF_TOKEN')
        print("✓ HuggingFace token loaded")
    except:
        print("⚠️  No HF token - speaker diarization will be limited")
        HF_TOKEN = None

    # Find WAV files
    wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
    if not wav_files:
        print(f"❌ No WAV files found in {INPUT_PATH}")
        return

    # Check which need processing
    remaining_files = []
    for wav_file in wav_files:
        base_name = os.path.splitext(os.path.basename(wav_file))[0]
        if not any(os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
                  for suffix in ['_ultimate.txt', '_enhanced.txt', '_large-v3.txt']):
            remaining_files.append(wav_file)

    print(f"\n📊 Status: {len(remaining_files)} files to process")

    if not remaining_files:
        print("✅ All files already processed!")
        return

    # Load model once
    print(f"\n⏳ Loading Whisper {WHISPER_MODEL}...")
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load with optimizations
    with torch.inference_mode():
        model = whisper.load_model(WHISPER_MODEL, device=device)

    print("✓ Model loaded and optimized")

    # Process files (3 at a time for large-v3)
    files_to_process = remaining_files[:3]
    successful = 0

    for audio_file in files_to_process:
        if process_audio_file(model, audio_file, HF_TOKEN):
            successful += 1

        # Cool down between files
        if audio_file != files_to_process[-1]:
            print("\n⏳ Cooling down for 5 seconds...")
            time.sleep(5)

    # Cleanup
    del model
    torch.cuda.empty_cache()
    gc.collect()

    print("\n" + "="*60)
    print(f"✅ SESSION COMPLETE")
    print(f"   Processed: {successful}/{len(files_to_process)} files")
    print(f"   Remaining: {len(remaining_files) - len(files_to_process)} files")
    if len(remaining_files) > len(files_to_process):
        print("\n💡 Run again to process remaining files")
    print("="*60)

# ============================================
# RUN THE SYSTEM
# ============================================
if __name__ == "__main__":
    main()

# ============================================
# USAGE NOTES
# ============================================
"""
KEY IMPROVEMENTS IN THIS VERSION:

1. GPU OPTIMIZATION:
   - Dynamic batch sizing based on available memory
   - TF32 and cuDNN optimization enabled
   - Larger chunk processing (45-60 seconds)

2. TRANSCRIPTION QUALITY:
   - Smart repetition detection and correction
   - Intelligent conditioning (on by default, off for problems)
   - Post-processing fixes acronyms, URLs, and punctuation

3. SPEAKER DIARIZATION:
   - Robust Pyannote implementation
   - Fallback to pause-based detection
   - Better speaker assignment logic

4. PERFORMANCE:
   - Preprocesses audio for better results
   - Concurrent processing capability
   - Memory-efficient segment merging

To get the best results:
1. Add HF_TOKEN to Colab secrets for diarization
2. Ensure GPU runtime is enabled
3. Delete any corrupted previous transcripts
4. Run this complete system
"""

🚀 Whisper V3 Ultimate Transcription System
✓ openai-whisper already installed
✓ pyannote.audio already installed
✓ pydub already installed

🎮 GPU Optimization
   Device: Tesla T4
   Total Memory: 15.8 GB
   Memory available for processing: 9.3 GB
✓ HuggingFace token loaded

📊 Status: 6 files to process

⏳ Loading Whisper large-v3...
✓ Model loaded and optimized

Processing: Call Recording - 13Mar25 1300 HB.wav
Size: 70.3 MB
GPU Memory: 6.3/15.8 GB used
   🎵 Preprocessing audio...
   🎯 Transcribing with optimal settings...
Detected language: English


 99%|█████████▊| 82508/83628 [10:44<00:08, 127.92frames/s]


   🌍 Detected language: en
   🎭 Running enhanced speaker diarization...


config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _speechbrain_save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _speechbrain_load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for load
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint save hook for _save
DEBUG:speechbrain.utils.checkpoints:Registered checkpoint load hook for _recover


pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

   ✓ Diarization complete: 2 speakers identified
   ✨ Post-processing transcript...

   ✅ Processing complete!
      Duration: 13.9 minutes
      Process time: 699.0 seconds
      Speed: 1.2x realtime
      Speakers: 2
   💾 Saved: Call Recording - 13Mar25 1300 HB_ultimate.txt

⏳ Cooling down for 5 seconds...

Processing: Call Recording - 19Mar2025 0800 JD.wav
Size: 125.2 MB
GPU Memory: 6.3/15.8 GB used
   🎵 Preprocessing audio...
   🎯 Transcribing with optimal settings...
Detected language: English


 10%|▉         | 14550/148857 [00:49<07:39, 292.55frames/s]


KeyboardInterrupt: 

# Test Drive File Storage


In [3]:
# SIMPLE WORKING TRANSCRIPTION SYSTEM
# Based on the approach that was working

# ============================================
# CELL 1: Complete Setup and Processing
# ============================================

import os
import glob
import json
import time
import gc
import subprocess
from datetime import datetime
from google.colab import drive

# Mount Drive
# if not os.path.exists('/content/drive'):
#    drive.mount('/content/drive')

In [4]:
# ============================================
# BLOCK 2: File Discovery and Status
# ============================================
"""
Run this to see what files need processing
"""

# Mount Google Drive
drive.mount('/content/drive')

# Define paths - adjust these to your actual locations
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"

# Get list of audio files
mp4_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.mp4")))
wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
all_audio_files = mp4_files + wav_files

print(f"\n📁 Found {len(all_audio_files)} audio files:")
for i, f in enumerate(all_audio_files, 1):
    print(f"  {i}. {os.path.basename(f)}")

# Check what's already been transcribed
completed_files = []
remaining_files = []

for audio_file in all_audio_files:
    base_name = os.path.splitext(os.path.basename(audio_file))[0]
    transcript_path = os.path.join(OUTPUT_PATH, f"{base_name}_transcript.txt")

    if os.path.exists(transcript_path):
        completed_files.append(audio_file)
    else:
        remaining_files.append(audio_file)

print(f"\n📊 Status:")
print(f"  ✓ Completed: {len(completed_files)}")
print(f"  ⏳ Remaining: {len(remaining_files)}")

if remaining_files:
    print(f"\n🎯 Next file to process: {os.path.basename(remaining_files[0])}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

📁 Found 8 audio files:
  1. Call Recording - 13Mar2025 1200 BPA.wav
  2. Call Recording - 13Mar25 1130 BK.wav
  3. Call Recording - 13Mar25 1300 HB.wav
  4. Call Recording - 19Mar2025 0800 JD.wav
  5. Call Recording - 19Mar25 0900 - AJ.wav
  6. Call Recording - 19Mar25 1730 - MO.wav
  7. Call Recording - 20Mar2025 1200 LN.wav
  8. Call Recording - 26Mar2025 0830 SA.wav

📊 Status:
  ✓ Completed: 0
  ⏳ Remaining: 8

🎯 Next file to process: Call Recording - 13Mar2025 1200 BPA.wav
