<a href="https://colab.research.google.com/github/VJMeyer/HPVPre_Repo/blob/main/PRUT_Transcriber6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# WHISPER V3 ULTIMATE - MAXIMIZING GPU USAGE FOR QUALITY
# Targets 14GB GPU RAM usage with quality-focused optimizations

import os
import glob
import time
import gc
import subprocess
import numpy as np
from datetime import datetime
from google.colab import drive

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# CONFIGURATION - QUALITY MAXIMIZED
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
WHISPER_MODEL = "large-v3"

# Enhanced parameters for maximum quality
TRANSCRIPTION_PARAMS = {
    # Quality settings
    'best_of': 5,  # Generate 5 candidates and pick the best (uses more memory)
    'beam_size': 10,  # Beam search instead of greedy (much better quality, more memory)
    'patience': 2.0,  # Beam search patience
    'length_penalty': 1.0,  # Favor longer sequences

    # Temperature strategy
    'temperature': 0.0,  # Start with 0 for consistency
    'temperature_increment_on_fallback': 0.2,  # Only increase if needed

    # Thresholds
    'compression_ratio_threshold': 2.4,
    'logprob_threshold': -1.0,
    'no_speech_threshold': 0.6,

    # Context handling - SMART approach
    'condition_on_previous_text': True,  # Keep context by default
    'initial_prompt': "This is a professional interview or call recording with clear speech.",

    # Advanced options
    'word_timestamps': True,
    'prepend_punctuations': '"\'"¿([{-',
    'append_punctuations': '"\'.。,，!！?？:：")]}、',

    # Longer audio segments for better context
    'chunk_length': 60,  # Process 60-second chunks instead of default 30
}

# Memory-intensive options for 14GB target
ADVANCED_OPTIONS = {
    'n_mel': 128,  # Increase mel bands (default 80)
    'sample_len': 1500,  # Longer sample length
    'best_of_temperatures': [0.0, 0.1, 0.2, 0.4, 0.8],  # More temperature attempts
}

os.makedirs(OUTPUT_PATH, exist_ok=True)

print("🚀 Whisper V3 Ultimate - Quality Maximized Edition")
print("   Target GPU usage: 14GB for maximum accuracy")
print("   Features: Beam search, extended context, smart hallucination prevention")
print("="*60)

# ============================================
# INSTALL DEPENDENCIES (SIMPLIFIED)
# ============================================
print("\n📦 Checking dependencies...")

try:
    import whisper
    import torch
    print("✓ Core packages ready")
except ImportError:
    print("Installing Whisper...")
    subprocess.run(['pip', 'install', '--quiet', 'openai-whisper'], check=True)
    import whisper
    import torch

# ============================================
# GPU CONFIGURATION
# ============================================
if torch.cuda.is_available():
    device = "cuda"
    gpu_props = torch.cuda.get_device_properties(0)
    total_memory_gb = gpu_props.total_memory / 1e9

    print(f"\n🎮 GPU Configuration:")
    print(f"   Device: {gpu_props.name}")
    print(f"   Total Memory: {total_memory_gb:.1f} GB")

    # Set PyTorch to use more memory
    torch.cuda.set_per_process_memory_fraction(0.95)  # Use 95% of GPU memory
    print(f"   Allocated for processing: {total_memory_gb * 0.95:.1f} GB")

    # Enable TF32 for better performance on Ampere GPUs
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
    print("   TF32 acceleration: Enabled")
else:
    device = "cpu"
    print("⚠️  No GPU detected - this will be extremely slow")

# ============================================
# ENHANCED TRANSCRIPTION FUNCTIONS
# ============================================

def load_audio_extended(file_path, sr=16000):
    """Load audio with extended precision"""
    import librosa
    audio, _ = librosa.load(file_path, sr=sr, mono=True, dtype=np.float32)
    return audio

def smart_transcribe(model, audio_path, initial_params):
    """
    Intelligent transcription with adaptive parameters
    """
    print("   🧠 Smart transcription mode engaged...")

    # First attempt with context
    params = initial_params.copy()
    attempt = 1
    max_attempts = 3

    while attempt <= max_attempts:
        try:
            print(f"   Attempt {attempt}/3 (context={'on' if params['condition_on_previous_text'] else 'off'})...", end='', flush=True)

            # Load audio in high quality
            audio = load_audio_extended(audio_path)

            # Transcribe with current parameters
            result = model.transcribe(
                audio,
                language=None,  # Auto-detect
                task='transcribe',
                verbose=False,
                fp16=(device == "cuda"),
                **params
            )

            # Check for hallucinations
            segments = result.get('segments', [])
            hallucination_score = calculate_hallucination_score(segments)

            if hallucination_score < 0.1:  # Good transcription
                print(" ✓ Success (quality score: {:.2f})".format(1 - hallucination_score))
                return result
            else:
                print(f" ⚠️  Hallucination detected (score: {hallucination_score:.2f})")

                # Adaptive strategy
                if attempt == 1:
                    # Try without context
                    params['condition_on_previous_text'] = False
                    params['initial_prompt'] = None
                elif attempt == 2:
                    # Increase temperature and reduce beam search
                    params['temperature'] = 0.2
                    params['beam_size'] = 5
                    params['best_of'] = 3

                attempt += 1

        except Exception as e:
            print(f" ❌ Error: {e}")
            attempt += 1

    # Return best effort
    return result

def calculate_hallucination_score(segments):
    """
    Calculate a score from 0-1 indicating likelihood of hallucination
    """
    if len(segments) < 5:
        return 0.0

    # Check for repetitions
    texts = [seg.get('text', '').strip().lower() for seg in segments]

    repetition_count = 0
    for i in range(len(texts) - 3):
        # Check for exact repetitions in sliding window
        window = texts[i:i+4]
        if len(set(window)) == 1 and window[0]:
            repetition_count += 1

    # Check for suspiciously regular timestamps
    timestamp_regularity = 0
    for i in range(len(segments) - 1):
        duration = segments[i+1]['start'] - segments[i]['end']
        if duration < 0.1:  # Segments too close together
            timestamp_regularity += 1

    # Combined score
    repetition_score = repetition_count / max(1, len(segments) - 3)
    timestamp_score = timestamp_regularity / max(1, len(segments) - 1)

    return min(1.0, repetition_score + timestamp_score)

def format_time(seconds):
    """Format seconds to MM:SS.ss"""
    minutes = int(seconds // 60)
    secs = seconds % 60
    return f"{minutes:02d}:{secs:05.2f}"

# ============================================
# MAIN PROCESSING PIPELINE
# ============================================

# Find WAV files
wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
remaining_files = []

print(f"\n📊 Scanning for files...")
for wav_file in wav_files:
    base_name = os.path.splitext(os.path.basename(wav_file))[0]

    # Check multiple possible output names
    transcript_exists = any(
        os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
        for suffix in ['_ultimate.txt', '_enhanced.txt', '_large-v3.txt']
    )

    if not transcript_exists:
        remaining_files.append(wav_file)
        size_mb = os.path.getsize(wav_file) / (1024**2)
        print(f"   ⏳ {os.path.basename(wav_file)} ({size_mb:.1f} MB)")

print(f"\nFiles to process: {len(remaining_files)} of {len(wav_files)}")

if remaining_files:
    # Process 2 files per session with large-v3
    files_to_process = remaining_files[:2]

    # Load model with maximum quality settings
    print(f"\n⏳ Loading {WHISPER_MODEL} for maximum quality...")
    print("   This optimizes for accuracy over speed")

    start_load = time.time()
    model = whisper.load_model(WHISPER_MODEL, device=device)
    load_time = time.time() - start_load

    # Check memory usage after model load
    if torch.cuda.is_available():
        allocated_gb = torch.cuda.memory_allocated() / 1e9
        reserved_gb = torch.cuda.memory_reserved() / 1e9
        print(f"\n✓ Model loaded in {load_time:.1f}s")
        print(f"   GPU Memory: {allocated_gb:.1f}GB used, {reserved_gb:.1f}GB reserved")
        print(f"   Free for processing: {total_memory_gb - reserved_gb:.1f}GB")

    # Process files
    for idx, audio_file in enumerate(files_to_process):
        base_name = os.path.splitext(os.path.basename(audio_file))[0]
        output_file = os.path.join(OUTPUT_PATH, f"{base_name}_ultimate.txt")

        print(f"\n{'='*70}")
        print(f"[{idx+1}/{len(files_to_process)}] {os.path.basename(audio_file)}")
        print(f"{'='*70}")

        try:
            start_time = time.time()
            file_size_mb = os.path.getsize(audio_file) / (1024**2)

            # Estimate duration from file size (rough approximation)
            estimated_duration = file_size_mb * 12  # ~12 seconds per MB for WAV

            print(f"   File size: {file_size_mb:.1f} MB")
            print(f"   Estimated duration: {format_time(estimated_duration)}")
            print(f"   Using: Beam search (size={TRANSCRIPTION_PARAMS['beam_size']}), "
                  f"Best of {TRANSCRIPTION_PARAMS['best_of']}")

            # Perform smart transcription
            result = smart_transcribe(model, audio_file, TRANSCRIPTION_PARAMS)

            # Extract results
            segments = result.get('segments', [])
            detected_language = result.get('language', 'unknown')
            actual_duration = segments[-1]['end'] if segments else 0
            process_time = time.time() - start_time

            # Calculate statistics
            speed_factor = actual_duration / process_time if process_time > 0 else 0
            words_count = sum(len(seg.get('text', '').split()) for seg in segments)

            print(f"\n✅ Transcription complete!")
            print(f"   Language: {detected_language}")
            print(f"   Duration: {format_time(actual_duration)}")
            print(f"   Process time: {format_time(process_time)}")
            print(f"   Speed: {speed_factor:.1f}x realtime")
            print(f"   Total words: {words_count:,}")

            # Memory check
            if torch.cuda.is_available():
                peak_memory_gb = torch.cuda.max_memory_allocated() / 1e9
                print(f"   Peak GPU memory: {peak_memory_gb:.1f}GB")

            # Save high-quality transcript
            with open(output_file, 'w', encoding='utf-8') as f:
                # Header with metadata
                f.write(f"# Whisper V3 Ultimate Transcription\n")
                f.write(f"# Model: {WHISPER_MODEL}\n")
                f.write(f"# Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
                f.write(f"# File: {os.path.basename(audio_file)}\n")
                f.write(f"# Duration: {format_time(actual_duration)}\n")
                f.write(f"# Language: {detected_language}\n")
                f.write(f"# Words: {words_count:,}\n")
                f.write(f"# Processing: Beam search (size={TRANSCRIPTION_PARAMS['beam_size']}), "
                       f"Best of {TRANSCRIPTION_PARAMS['best_of']}\n")
                f.write("#" + "="*70 + "\n\n")

                # Write segments with improved formatting
                for i, segment in enumerate(segments):
                    start = segment['start']
                    end = segment['end']
                    text = segment.get('text', '').strip()

                    if text:  # Skip empty segments
                        # Format: [MM:SS.ss → MM:SS.ss] Text
                        f.write(f"[{format_time(start)} → {format_time(end)}] {text}\n")

                        # Add paragraph breaks for long pauses
                        if i < len(segments) - 1:
                            next_start = segments[i + 1]['start']
                            pause_duration = next_start - end
                            if pause_duration > 3.0:  # 3+ second pause
                                f.write("\n")

            print(f"\n✓ Saved: {os.path.basename(output_file)}")

        except Exception as e:
            print(f"\n❌ Failed: {str(e)}")
            import traceback
            traceback.print_exc()

            # Save error log
            with open(f"{output_file}.error", 'w') as f:
                f.write(f"Error processing: {audio_file}\n")
                f.write(f"Error: {str(e)}\n")
                f.write(f"Traceback:\n{traceback.format_exc()}\n")

        # Cleanup after each file
        torch.cuda.empty_cache()
        gc.collect()

        # Show memory stats
        if torch.cuda.is_available():
            current_memory_gb = torch.cuda.memory_allocated() / 1e9
            print(f"\n💾 Memory after cleanup: {current_memory_gb:.1f}GB")

        # Cool down between files
        if idx < len(files_to_process) - 1:
            print("\n⏳ Cooling down for 10 seconds...")
            time.sleep(10)

    # Final cleanup
    del model
    torch.cuda.empty_cache()
    gc.collect()

    print(f"\n{'='*70}")
    print("✅ SESSION COMPLETE")
    print(f"   Processed: {len(files_to_process)} files")
    print(f"   Remaining: {len(remaining_files) - len(files_to_process)} files")
    print(f"   Output format: *_ultimate.txt")
    print("\n💡 Quality optimizations used:")
    print("   - Beam search for better accuracy")
    print("   - Best-of-N sampling")
    print("   - Extended context windows")
    print("   - Smart hallucination prevention")
    print("   - 95% GPU memory utilization")
    print(f"{'='*70}")

else:
    print("\n✅ All files have been transcribed!")
    print(f"📁 Transcripts location: {OUTPUT_PATH}")

# ============================================
# OPTIMIZATION TIPS
# ============================================
print("\n" + "="*70)
print("💡 FURTHER OPTIMIZATION OPTIONS")
print("="*70)
print("To push quality even higher, you can manually adjust:")
print("")
print("1. Increase beam_size to 20 (slower but more accurate):")
print("   TRANSCRIPTION_PARAMS['beam_size'] = 20")
print("")
print("2. Increase best_of to 10 (much slower):")
print("   TRANSCRIPTION_PARAMS['best_of'] = 10")
print("")
print("3. For interviews with technical terms, add a prompt:")
print("   TRANSCRIPTION_PARAMS['initial_prompt'] = 'Technical interview about...'")
print("")
print("4. For very long files, increase chunk_length:")
print("   TRANSCRIPTION_PARAMS['chunk_length'] = 120  # 2-minute chunks")
print("="*70)

Mounted at /content/drive
🚀 Whisper V3 Ultimate - Quality Maximized Edition
   Target GPU usage: 14GB for maximum accuracy
   Features: Beam search, extended context, smart hallucination prevention

📦 Checking dependencies...
Installing Whisper...

🎮 GPU Configuration:
   Device: Tesla T4
   Total Memory: 15.8 GB
   Allocated for processing: 15.0 GB
   TF32 acceleration: Enabled

📊 Scanning for files...
   ⏳ Call Recording - 19Mar2025 0800 JD.wav (125.2 MB)
   ⏳ Call Recording - 19Mar25 0900 - AJ.wav (77.2 MB)
   ⏳ Call Recording - 19Mar25 1730 - MO.wav (87.2 MB)
   ⏳ Call Recording - 20Mar2025 1200 LN.wav (179.8 MB)
   ⏳ Call Recording - 26Mar2025 0830 SA.wav (94.2 MB)

Files to process: 5 of 8

⏳ Loading large-v3 for maximum quality...
   This optimizes for accuracy over speed


100%|█████████████████████████████████████| 2.88G/2.88G [02:12<00:00, 23.3MiB/s]



✓ Model loaded in 164.4s
   GPU Memory: 6.3GB used, 9.8GB reserved
   Free for processing: 6.0GB

[1/2] Call Recording - 19Mar2025 0800 JD.wav
   File size: 125.2 MB
   Estimated duration: 25:02.52
   Using: Beam search (size=10), Best of 5
   🧠 Smart transcription mode engaged...
   Attempt 1/3 (context=on)...Detected language: English


  0%|          | 0/148857 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'
   Attempt 2/3 (context=on)...




Detected language: English


  0%|          | 0/148857 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'
   Attempt 3/3 (context=on)...




Detected language: English


  0%|          | 0/148857 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'

❌ Failed: cannot access local variable 'result' where it is not associated with a value



Traceback (most recent call last):
  File "/tmp/ipython-input-1-3841609909.py", line 278, in <cell line: 0>
    result = smart_transcribe(model, audio_file, TRANSCRIPTION_PARAMS)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-1-3841609909.py", line 173, in smart_transcribe
    return result
           ^^^^^^
UnboundLocalError: cannot access local variable 'result' where it is not associated with a value



💾 Memory after cleanup: 6.3GB

⏳ Cooling down for 10 seconds...

[2/2] Call Recording - 19Mar25 0900 - AJ.wav
   File size: 77.2 MB
   Estimated duration: 15:25.97
   Using: Beam search (size=10), Best of 5
   🧠 Smart transcription mode engaged...
   Attempt 1/3 (context=on)...Detected language: English


  0%|          | 0/91737 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'
   Attempt 2/3 (context=on)...




Detected language: English


  0%|          | 0/91737 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'
   Attempt 3/3 (context=on)...




Detected language: English


  0%|          | 0/91737 [00:00<?, ?frames/s]

 ❌ Error: DecodingOptions.__init__() got an unexpected keyword argument 'temperature_increment_on_fallback'

❌ Failed: cannot access local variable 'result' where it is not associated with a value

💾 Memory after cleanup: 6.3GB



Traceback (most recent call last):
  File "/tmp/ipython-input-1-3841609909.py", line 278, in <cell line: 0>
    result = smart_transcribe(model, audio_file, TRANSCRIPTION_PARAMS)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-1-3841609909.py", line 173, in smart_transcribe
    return result
           ^^^^^^
UnboundLocalError: cannot access local variable 'result' where it is not associated with a value



✅ SESSION COMPLETE
   Processed: 2 files
   Remaining: 3 files
   Output format: *_ultimate.txt

💡 Quality optimizations used:
   - Beam search for better accuracy
   - Best-of-N sampling
   - Extended context windows
   - Smart hallucination prevention
   - 95% GPU memory utilization

💡 FURTHER OPTIMIZATION OPTIONS
To push quality even higher, you can manually adjust:

1. Increase beam_size to 20 (slower but more accurate):
   TRANSCRIPTION_PARAMS['beam_size'] = 20

2. Increase best_of to 10 (much slower):
   TRANSCRIPTION_PARAMS['best_of'] = 10

3. For interviews with technical terms, add a prompt:
   TRANSCRIPTION_PARAMS['initial_prompt'] = 'Technical interview about...'

4. For very long files, increase chunk_length:
   TRANSCRIPTION_PARAMS['chunk_length'] = 120  # 2-minute chunks


In [1]:
# WHISPER V3 SIMPLE & STABLE - NO MEMORY ISSUES
# Clean implementation without parallel processing or aggressive allocation

import os
import glob
import time
import gc
import subprocess
import sys
import torch
from datetime import datetime
from google.colab import drive

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# INSTALL DEPENDENCIES
# ============================================
print("📦 Checking dependencies...")
try:
    import whisper
    print("✓ Whisper already installed")
except ImportError:
    print("Installing openai-whisper...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'openai-whisper'])
    import whisper
    print("✓ Whisper installed")

# ============================================
# CONFIGURATION - SIMPLE & STABLE
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# Simple transcription parameters
TRANSCRIPTION_PARAMS = {
    'temperature': 0.2,
    'compression_ratio_threshold': 2.4,
    'logprob_threshold': -1.0,
    'no_speech_threshold': 0.6,
    'condition_on_previous_text': True,
    'word_timestamps': True,
    'prepend_punctuations': '"\'"¿([{-',
    'append_punctuations': '"\'.。,，!！?？:：")]}、',
    'beam_size': 5,  # Standard beam size
    'best_of': 5,    # Standard best_of
    'fp16': False,    # Use FP16 for efficiency
}

print("\n🚀 Whisper V3 Simple Transcription System")
print("="*60)

# ============================================
# GPU MONITORING
# ============================================
def print_gpu_memory():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        print(f"GPU Memory: {allocated:.1f}GB allocated, {reserved:.1f}GB reserved / {total:.1f}GB total")
    else:
        print("No GPU available")

# ============================================
# LOAD MODEL (SIMPLE)
# ============================================
print("\nLoading Whisper large-v3...")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Clear any existing memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    gc.collect()

# Load model normally
model = whisper.load_model("large-v3", device=device)
print("✓ Model loaded successfully")
print_gpu_memory()

# ============================================
# POST-PROCESSING UTILITIES
# ============================================
def clean_text(text):
    """Basic text cleanup"""
    # Fix common acronym patterns
    text = text.replace(' h m h c ', ' HMHC ')
    text = text.replace(' h n h m ', ' HMHC ')
    text = text.replace(' c a ', ' CA ')
    text = text.replace(' c o m ', '.com ')
    text = text.replace(' . ', '.')

    # Fix spacing around punctuation
    text = text.replace(' ,', ',')
    text = text.replace(' .', '.')
    text = text.replace(' ?', '?')
    text = text.replace(' !', '!')

    return text.strip()

def is_quality_issue(segments):
    """Check for repetition or quality issues"""
    if len(segments) < 10:
        return False

    # Check for excessive repetition
    texts = [seg.get('text', '').strip().lower() for seg in segments[-10:]]
    unique_texts = set(texts)

    # If last 10 segments have less than 3 unique texts, there's likely repetition
    return len(unique_texts) < 3

# ============================================
# MAIN TRANSCRIPTION FUNCTION
# ============================================
def transcribe_file(audio_path):
    """Transcribe a single file with quality checks"""
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_file = os.path.join(OUTPUT_PATH, f"{base_name}_transcript.txt")

    print(f"\n{'='*60}")
    print(f"Processing: {os.path.basename(audio_path)}")
    print(f"{'='*60}")

    try:
        start_time = time.time()
        file_size_mb = os.path.getsize(audio_path) / (1024**2)
        print(f"File size: {file_size_mb:.1f} MB")

        # Transcribe with standard parameters
        print("Transcribing...")
        result = model.transcribe(
            audio_path,
            language=None,  # Auto-detect language
            task='transcribe',
            verbose=False,
            **TRANSCRIPTION_PARAMS
        )

        # Check for quality issues
        segments = result.get('segments', [])
        if is_quality_issue(segments):
            print("⚠️  Quality issue detected, retrying with adjusted parameters...")

            # Retry with different parameters
            retry_params = TRANSCRIPTION_PARAMS.copy()
            retry_params['temperature'] = 0.8
            retry_params['condition_on_previous_text'] = False

            result = model.transcribe(
                audio_path,
                language=None,
                task='transcribe',
                verbose=False,
                **retry_params
            )
            segments = result.get('segments', [])

        # Process results
        duration = segments[-1]['end'] if segments else 0
        process_time = time.time() - start_time
        speed_factor = duration / process_time if process_time > 0 else 0

        print(f"\n✅ Transcription complete!")
        print(f"   Language: {result.get('language', 'unknown')}")
        print(f"   Duration: {duration/60:.1f} minutes")
        print(f"   Process time: {process_time:.1f} seconds")
        print(f"   Speed: {speed_factor:.1f}x realtime")
        print_gpu_memory()

        # Save transcript
        with open(output_file, 'w', encoding='utf-8') as f:
            # Header
            f.write(f"# Whisper Large-v3 Transcript\n")
            f.write(f"# File: {os.path.basename(audio_path)}\n")
            f.write(f"# Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"# Language: {result.get('language', 'unknown')}\n")
            f.write(f"# Duration: {duration/60:.1f} minutes\n")
            f.write("#" + "="*50 + "\n\n")

            # Write segments
            for segment in segments:
                start = segment['start']
                end = segment['end']
                text = clean_text(segment.get('text', ''))

                if text:  # Only write non-empty segments
                    f.write(f"[{start:06.2f} → {end:06.2f}] {text}\n")

        print(f"💾 Saved: {os.path.basename(output_file)}")
        return True

    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return False

    finally:
        # Clear GPU cache after each file
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

# ============================================
# MAIN EXECUTION
# ============================================
def main():
    # Find files to process
    wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
    remaining_files = []

    for wav_file in wav_files:
        base_name = os.path.splitext(os.path.basename(wav_file))[0]

        # Check if already processed
        transcript_exists = any(
            os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
            for suffix in ['_transcript.txt', '_final.txt', '_enhanced.txt', '_gpu_optimized.txt']
        )

        if not transcript_exists:
            remaining_files.append(wav_file)

    print(f"\n📊 Status:")
    print(f"   Total files: {len(wav_files)}")
    print(f"   Already processed: {len(wav_files) - len(remaining_files)}")
    print(f"   To process: {len(remaining_files)}")

    if not remaining_files:
        print("\n✅ All files already processed!")
        return

    # Process files one by one (no parallel processing)
    files_to_process = remaining_files[:5]  # Process up to 5 files per session
    successful = 0

    for idx, audio_file in enumerate(files_to_process):
        if transcribe_file(audio_file):
            successful += 1

        # Cool down between files
        if idx < len(files_to_process) - 1:
            print("\n⏳ Cooling down for 5 seconds...")
            time.sleep(5)

    # Final summary
    print("\n" + "="*60)
    print("✅ SESSION COMPLETE")
    print(f"   Processed successfully: {successful}/{len(files_to_process)}")
    print(f"   Remaining files: {len(remaining_files) - len(files_to_process)}")

    if len(remaining_files) > len(files_to_process):
        print("\n💡 Run again to process remaining files")

    print("="*60)

# ============================================
# RUN
# ============================================
if __name__ == "__main__":
    main()

# ============================================
# NOTES
# ============================================
"""
This simplified version:
1. NO parallel processing - one file at a time
2. NO aggressive memory allocation
3. NO complex GPU optimization
4. Just reliable, quality transcription

GPU usage will be around 7-10GB, which is normal for large-v3.

To increase speed slightly, you can adjust:
- beam_size: Lower to 3 for faster processing
- best_of: Lower to 3 for faster processing

But the defaults (5/5) provide the best quality.
"""

📦 Checking dependencies...
✓ Whisper already installed

🚀 Whisper V3 Simple Transcription System

Loading Whisper large-v3...
✓ Model loaded successfully
GPU Memory: 6.3GB allocated, 9.8GB reserved / 15.8GB total

📊 Status:
   Total files: 8
   Already processed: 2
   To process: 6

Processing: Call Recording - 13Mar25 1300 HB.wav
File size: 70.3 MB
Transcribing...
Detected language: English


100%|██████████| 83628/83628 [09:15<00:00, 150.50frames/s]



✅ Transcription complete!
   Language: en
   Duration: 13.9 minutes
   Process time: 560.5 seconds
   Speed: 1.5x realtime
GPU Memory: 6.3GB allocated, 10.8GB reserved / 15.8GB total
💾 Saved: Call Recording - 13Mar25 1300 HB_transcript.txt

⏳ Cooling down for 5 seconds...

Processing: Call Recording - 19Mar2025 0800 JD.wav
File size: 125.2 MB
Transcribing...


KeyboardInterrupt: 

In [None]:
# WHISPER V3 GPU-OPTIMIZED - MAXIMUM MEMORY UTILIZATION
# Aggressive GPU usage without speaker diarization

import os
import glob
import time
import gc
import subprocess
import sys
import numpy as np
from datetime import datetime
from google.colab import drive
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import queue

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# INSTALL DEPENDENCIES
# ============================================
print("📦 Installing dependencies...")
try:
    import whisper
    print("✓ Whisper already installed")
except ImportError:
    print("Installing openai-whisper...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'openai-whisper'])
    import whisper
    print("✓ Whisper installed")

try:
    import torch
    print("✓ PyTorch already installed")
except ImportError:
    print("Installing torch...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'torch'])
    import torch

# ============================================
# CONFIGURATION
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# ============================================
# GPU OPTIMIZATION OPTIONS - MODIFY THESE!
# ============================================
GPU_OPTIONS = {
    # OPTION 1: Parallel processing (risky but uses more memory)
    'PARALLEL_FILES': 1,  # Process 2 files simultaneously (set to 1 for safe mode)

    # OPTION 2: Pre-cache audio in GPU memory
    'PRELOAD_TO_GPU': True,  # Load audio directly to GPU tensors

    # OPTION 3: Increase internal mel spectrogram cache
    'MEL_CACHE_SIZE': 10,  # Cache multiple mel spectrograms

    # OPTION 4: Keep model in higher precision (uses more memory)
    'USE_FLOAT32': False,  # Set True to use float32 (doubles memory usage)

    # OPTION 5: Decode parameters affecting memory
    'BEAM_SIZE': 5,  # Increase for more memory usage (default 5)
    'BEST_OF': 5,   # Increase for more memory usage (default 5)

    # OPTION 6: Process longer chunks at once
    'N_FRAMES': 4500,  # Default is 3000, increase to process more at once
}

print("🚀 Whisper V3 GPU-Optimized System")
print("="*60)
print("GPU Memory Optimization Settings:")
for key, value in GPU_OPTIONS.items():
    print(f"  {key}: {value}")
print("="*60)

# ============================================
# GPU MEMORY MONITORING
# ============================================
def get_gpu_stats():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        return allocated, reserved, total
    return 0, 0, 0

def print_gpu_stats(prefix=""):
    alloc, reserved, total = get_gpu_stats()
    print(f"{prefix}GPU Memory: {alloc:.1f}GB allocated, {reserved:.1f}GB reserved / {total:.1f}GB total")

# ============================================
# AGGRESSIVE GPU MEMORY ALLOCATION
# ============================================
PERSISTENT_TENSORS = []  # Keep these alive throughout execution

if torch.cuda.is_available():
    # Force PyTorch to allocate more memory upfront
    torch.cuda.set_per_process_memory_fraction(0.95)  # Use 95% of GPU memory

    # Enable memory efficient attention if available
    if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
        torch.backends.cuda.enable_mem_efficient_sdp(True)

    # Allocate persistent tensors to reach target memory usage
    print("Pre-allocating GPU memory aggressively...")
    target_gb = GPU_OPTIONS.get('ALLOCATE_EXTRA_GB', 4)

    try:
        # Allocate large persistent tensors
        for i in range(target_gb):
            # 1GB tensor that stays in memory
            tensor = torch.zeros(256, 1024, 1024, device='cuda', dtype=torch.float32)
            PERSISTENT_TENSORS.append(tensor)
            print_gpu_stats(f"  After allocation {i+1}GB: ")

        # Additional allocation to reach 15GB
        if GPU_OPTIONS['PARALLEL_FILES'] > 1:
            print("  Allocating extra memory for parallel processing...")
            for i in range(2):
                tensor = torch.zeros(512, 1024, 1024, device='cuda', dtype=torch.float32)
                PERSISTENT_TENSORS.append(tensor)
                print_gpu_stats(f"  Extra allocation {i+1}: ")

    except RuntimeError as e:
        print(f"  Reached memory limit: {e}")

    print(f"  Total persistent tensors: {len(PERSISTENT_TENSORS)}")
    torch.cuda.synchronize()  # Ensure allocations are complete

# ============================================
# LOAD MODEL WITH OPTIONS
# ============================================
print("\nLoading Whisper large-v3...")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model with specific dtype
dtype = torch.float32 if GPU_OPTIONS['USE_FLOAT32'] else torch.float16
model = whisper.load_model("large-v3", device=device)

# Force model to use more memory
if device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']:
    model = model.half()  # FP16 mode
else:
    model = model.float()  # FP32 mode (uses 2x memory)

print_gpu_stats("After model load: ")

# Modify model's n_frames if specified
if hasattr(model, 'dims') and hasattr(model.dims, 'n_audio_ctx'):
    original_frames = model.dims.n_audio_ctx
    model.dims.n_audio_ctx = GPU_OPTIONS['N_FRAMES']
    print(f"Modified n_frames: {original_frames} → {GPU_OPTIONS['N_FRAMES']}")

# ============================================
# MEL SPECTROGRAM CACHE
# ============================================
mel_cache = {}
mel_cache_lock = threading.Lock()

def get_mel_cached(audio_path, model):
    """Cache mel spectrograms in GPU memory"""
    with mel_cache_lock:
        if audio_path in mel_cache:
            return mel_cache[audio_path]

        # Load and compute mel
        audio = whisper.load_audio(audio_path)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)

        # Cache if under limit
        if len(mel_cache) < GPU_OPTIONS['MEL_CACHE_SIZE']:
            mel_cache[audio_path] = mel
            print_gpu_stats(f"  Cached mel #{len(mel_cache)}: ")

        return mel

# ============================================
# PARALLEL TRANSCRIPTION FUNCTION
# ============================================
def transcribe_file_aggressive(model, audio_path, file_idx=0):
    """Transcribe with aggressive GPU usage"""
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_file = os.path.join(OUTPUT_PATH, f"{base_name}_gpu_optimized.txt")

    try:
        print(f"\n[Worker {file_idx}] Processing: {os.path.basename(audio_path)}")
        start_time = time.time()

        if GPU_OPTIONS['PRELOAD_TO_GPU']:
            # Load audio and convert to GPU tensor with correct dtype
            print(f"[Worker {file_idx}] Pre-loading to GPU...")
            audio = whisper.load_audio(audio_path)

            # Match tensor dtype to model dtype
            model_dtype = next(model.parameters()).dtype
            audio_tensor = torch.from_numpy(audio).to(device=device, dtype=torch.float32)

            # Create multiple copies to use more GPU memory
            audio_copies = [audio_tensor.clone() for _ in range(3)]
            print_gpu_stats(f"[Worker {file_idx}] After loading copies: ")

            # Process from numpy (Whisper expects numpy)
            result = model.transcribe(
                audio,  # Use original numpy array
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

            # Keep tensors in memory during processing to maintain GPU usage
            dummy_computation = torch.sum(torch.stack(audio_copies))

        else:
            # Standard transcription
            result = model.transcribe(
                audio_path,
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

        process_time = time.time() - start_time
        print(f"[Worker {file_idx}] Completed in {process_time:.1f}s")
        print_gpu_stats(f"[Worker {file_idx}] During processing: ")

        # Save transcript (no speaker info)
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(f"# Whisper Large-v3 Transcript (GPU Optimized)\n")
            f.write(f"# File: {os.path.basename(audio_path)}\n")
            f.write(f"# Date: {datetime.now()}\n")
            f.write(f"# Language: {result.get('language', 'unknown')}\n")
            f.write(f"# Process time: {process_time:.1f}s\n")
            f.write("#" + "="*60 + "\n\n")

            for segment in result['segments']:
                text = segment['text'].strip()
                if text:
                    # Basic text cleanup
                    text = text.replace(' h m h c ', ' HMHC ')
                    text = text.replace(' c a ', ' CA ')
                    text = text.replace(' . ', '.')

                    f.write(f"[{segment['start']:06.2f} → {segment['end']:06.2f}] {text}\n")

        return True, output_file

    except Exception as e:
        print(f"[Worker {file_idx}] Error: {e}")
        return False, str(e)

# ============================================
# MAIN PROCESSING WITH PARALLEL OPTION
# ============================================
def main():
    # Find files to process
    wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
    remaining = []

    for wav_file in wav_files:
        base_name = os.path.splitext(os.path.basename(wav_file))[0]
        output_exists = any(
            os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
            for suffix in ['_gpu_optimized.txt', '_final.txt', '_enhanced.txt', '_ultimate.txt']
        )
        if not output_exists:
            remaining.append(wav_file)

    print(f"\nFiles to process: {len(remaining)}")

    if not remaining:
        print("✅ All files already processed!")
        return

    # Process files
    files_to_process = remaining[:6]  # Process up to 6 files this session

    if GPU_OPTIONS['PARALLEL_FILES'] > 1:
        # PARALLEL PROCESSING - Uses more GPU memory
        print(f"\n🔥 PARALLEL MODE: Processing {GPU_OPTIONS['PARALLEL_FILES']} files simultaneously")
        print("⚠️  Warning: This may cause out-of-memory errors!")

        with ThreadPoolExecutor(max_workers=GPU_OPTIONS['PARALLEL_FILES']) as executor:
            futures = {}

            # Submit files for parallel processing
            for i, audio_file in enumerate(files_to_process[:GPU_OPTIONS['PARALLEL_FILES'] * 2]):
                if len(futures) >= GPU_OPTIONS['PARALLEL_FILES']:
                    # Wait for a slot to free up
                    done, _ = as_completed(futures, timeout=None).__next__()
                    futures.pop(done)

                future = executor.submit(transcribe_file_aggressive, model, audio_file, i)
                futures[future] = audio_file

                # Brief pause between submissions
                time.sleep(1)

            # Wait for remaining tasks
            for future in as_completed(futures):
                success, result = future.result()
                if success:
                    print(f"✓ Completed: {os.path.basename(futures[future])}")
                else:
                    print(f"❌ Failed: {os.path.basename(futures[future])}")

    else:
        # SEQUENTIAL PROCESSING - Safer but uses less memory
        print(f"\n🔒 SEQUENTIAL MODE: Processing files one at a time")

        for i, audio_file in enumerate(files_to_process):
            success, result = transcribe_file_aggressive(model, audio_file, i)

            if success:
                print(f"✓ Saved: {os.path.basename(result)}")
            else:
                print(f"❌ Failed: {result}")

            # Monitor memory between files
            print_gpu_stats("Between files: ")

            # Brief cooldown
            if i < len(files_to_process) - 1:
                time.sleep(3)

    # Final cleanup
    mel_cache.clear()
    PERSISTENT_TENSORS.clear()  # Clear persistent memory
    torch.cuda.empty_cache()
    gc.collect()

    print("\n" + "="*60)
    print("✅ SESSION COMPLETE")
    print_gpu_stats("Final: ")
    print("="*60)

# ============================================
# RUN MAIN
# ============================================
if __name__ == "__main__":
    print("\n⚙️  GPU OPTIMIZATION TIPS:")
    print("1. Set PARALLEL_FILES=2 to process 2 files at once (risky!)")
    print("2. Set USE_FLOAT32=True to double memory usage")
    print("3. Increase BEAM_SIZE and BEST_OF for more memory use")
    print("4. Set PRELOAD_TO_GPU=True to cache audio in GPU")
    print("\n⚠️  Higher settings may cause out-of-memory crashes!")
    print("="*60)

    main()

# ============================================
# EXPERIMENTAL: EXTREME GPU USAGE
# ============================================
"""
FOR MAXIMUM GPU USAGE (15+ GB), modify GPU_OPTIONS to:

GPU_OPTIONS = {
    'PARALLEL_FILES': 3,      # Process 3 files at once
    'PRELOAD_TO_GPU': True,   # Cache in GPU
    'MEL_CACHE_SIZE': 20,     # Large cache
    'USE_FLOAT32': True,      # Double precision (2x memory)
    'BEAM_SIZE': 20,          # Very large beam
    'BEST_OF': 20,            # Very large sampling
    'N_FRAMES': 6000,         # Process huge chunks
}

This will likely crash but will definitely use all GPU memory!
"""

📦 Installing dependencies...
✓ Whisper already installed
✓ PyTorch already installed
🚀 Whisper V3 GPU-Optimized System
GPU Memory Optimization Settings:
  PARALLEL_FILES: 1
  PRELOAD_TO_GPU: True
  MEL_CACHE_SIZE: 10
  USE_FLOAT32: False
  BEAM_SIZE: 5
  BEST_OF: 5
  N_FRAMES: 4500
Pre-allocating GPU memory aggressively...
  Reached memory limit: CUDA out of memory. Tried to allocate 1024.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 646.12 MiB is free. Process 50434 has 14.11 GiB memory in use. 14.00 GiB allowed; Of the allocated memory 13.70 GiB is allocated by PyTorch, and 306.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
  Total persistent tensors: 0

Loading Whisper large-v3...


In [1]:
# WHISPER V3 GPU-OPTIMIZED - MAXIMUM MEMORY UTILIZATION
# Aggressive GPU usage without speaker diarization

import os
import glob
import time
import gc
import subprocess
import sys
import numpy as np
from datetime import datetime
from google.colab import drive
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import queue

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# INSTALL DEPENDENCIES
# ============================================
print("📦 Installing dependencies...")
try:
    import whisper
    print("✓ Whisper already installed")
except ImportError:
    print("Installing openai-whisper...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'openai-whisper'])
    import whisper
    print("✓ Whisper installed")

try:
    import torch
    print("✓ PyTorch already installed")
except ImportError:
    print("Installing torch...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'torch'])
    import torch

# ============================================
# CONFIGURATION
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# ============================================
# GPU OPTIMIZATION OPTIONS - MODIFY THESE!
# ============================================
GPU_OPTIONS = {
    # OPTION 1: Parallel processing (risky but uses more memory)
    'PARALLEL_FILES': 2,  # Process 2 files simultaneously (set to 1 for safe mode)

    # OPTION 2: Pre-cache audio in GPU memory
    'PRELOAD_TO_GPU': False,  # Load audio directly to GPU tensors

    # OPTION 3: Increase internal mel spectrogram cache
    'MEL_CACHE_SIZE': 5,  # Cache multiple mel spectrograms

    # OPTION 4: Keep model in higher precision (uses more memory)
    'USE_FLOAT32': False,  # Set True to use float32 (doubles memory usage)

    # OPTION 5: Decode parameters affecting memory
    'BEAM_SIZE': 5,  # Increase for more memory usage (default 5)
    'BEST_OF': 5,   # Increase for more memory usage (default 5)

    # OPTION 6: Process longer chunks at once
    'N_FRAMES': 4500,  # Default is 3000, increase to process more at once
}

print("🚀 Whisper V3 GPU-Optimized System")
print("="*60)
print("GPU Memory Optimization Settings:")
for key, value in GPU_OPTIONS.items():
    print(f"  {key}: {value}")
print("="*60)

# ============================================
# GPU MEMORY MONITORING
# ============================================
def get_gpu_stats():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        return allocated, reserved, total
    return 0, 0, 0

def print_gpu_stats(prefix=""):
    alloc, reserved, total = get_gpu_stats()
    print(f"{prefix}GPU Memory: {alloc:.1f}GB allocated, {reserved:.1f}GB reserved / {total:.1f}GB total")

# ============================================
# AGGRESSIVE GPU MEMORY ALLOCATION
# ============================================
PERSISTENT_TENSORS = []  # Keep these alive throughout execution

if torch.cuda.is_available():
    # Force PyTorch to allocate more memory upfront
    torch.cuda.set_per_process_memory_fraction(0.95)  # Use 95% of GPU memory

    # Enable memory efficient attention if available
    if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
        torch.backends.cuda.enable_mem_efficient_sdp(True)

    # Allocate persistent tensors to reach target memory usage
    print("Pre-allocating GPU memory aggressively...")
    target_gb = GPU_OPTIONS.get('ALLOCATE_EXTRA_GB', 4)

    try:
        # Allocate large persistent tensors
        for i in range(target_gb):
            # 1GB tensor that stays in memory
            tensor = torch.zeros(256, 1024, 1024, device='cuda', dtype=torch.float32)
            PERSISTENT_TENSORS.append(tensor)
            print_gpu_stats(f"  After allocation {i+1}GB: ")

        # Additional allocation to reach 15GB
        if GPU_OPTIONS['PARALLEL_FILES'] > 1:
            print("  Allocating extra memory for parallel processing...")
            for i in range(2):
                tensor = torch.zeros(512, 1024, 1024, device='cuda', dtype=torch.float32)
                PERSISTENT_TENSORS.append(tensor)
                print_gpu_stats(f"  Extra allocation {i+1}: ")

    except RuntimeError as e:
        print(f"  Reached memory limit: {e}")

    print(f"  Total persistent tensors: {len(PERSISTENT_TENSORS)}")
    torch.cuda.synchronize()  # Ensure allocations are complete

# ============================================
# LOAD MODEL WITH OPTIONS
# ============================================
print("\nLoading Whisper large-v3...")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model with specific dtype
dtype = torch.float32 if GPU_OPTIONS['USE_FLOAT32'] else torch.float16
model = whisper.load_model("large-v3", device=device)

# Force model to use more memory
if device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']:
    model = model.half()  # FP16 mode
else:
    model = model.float()  # FP32 mode (uses 2x memory)

print_gpu_stats("After model load: ")

# Modify model's n_frames if specified
if hasattr(model, 'dims') and hasattr(model.dims, 'n_audio_ctx'):
    original_frames = model.dims.n_audio_ctx
    model.dims.n_audio_ctx = GPU_OPTIONS['N_FRAMES']
    print(f"Modified n_frames: {original_frames} → {GPU_OPTIONS['N_FRAMES']}")

# ============================================
# MEL SPECTROGRAM CACHE
# ============================================
mel_cache = {}
mel_cache_lock = threading.Lock()

def get_mel_cached(audio_path, model):
    """Cache mel spectrograms in GPU memory"""
    with mel_cache_lock:
        if audio_path in mel_cache:
            return mel_cache[audio_path]

        # Load and compute mel
        audio = whisper.load_audio(audio_path)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)

        # Cache if under limit
        if len(mel_cache) < GPU_OPTIONS['MEL_CACHE_SIZE']:
            mel_cache[audio_path] = mel
            print_gpu_stats(f"  Cached mel #{len(mel_cache)}: ")

        return mel

# ============================================
# PARALLEL TRANSCRIPTION FUNCTION
# ============================================
def transcribe_file_aggressive(model, audio_path, file_idx=0):
    """Transcribe with aggressive GPU usage"""
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_file = os.path.join(OUTPUT_PATH, f"{base_name}_gpu_optimized.txt")

    try:
        print(f"\n[Worker {file_idx}] Processing: {os.path.basename(audio_path)}")
        start_time = time.time()

        if GPU_OPTIONS['PRELOAD_TO_GPU']:
            # Load audio and convert to GPU tensor with correct dtype
            print(f"[Worker {file_idx}] Pre-loading to GPU...")
            audio = whisper.load_audio(audio_path)

            # Match tensor dtype to model dtype
            model_dtype = next(model.parameters()).dtype
            audio_tensor = torch.from_numpy(audio).to(device=device, dtype=torch.float32)

            # Create multiple copies to use more GPU memory
            audio_copies = [audio_tensor.clone() for _ in range(3)]
            print_gpu_stats(f"[Worker {file_idx}] After loading copies: ")

            # Process from numpy (Whisper expects numpy)
            result = model.transcribe(
                audio,  # Use original numpy array
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

            # Keep tensors in memory during processing to maintain GPU usage
            dummy_computation = torch.sum(torch.stack(audio_copies))

        else:
            # Standard transcription
            result = model.transcribe(
                audio_path,
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

        process_time = time.time() - start_time
        print(f"[Worker {file_idx}] Completed in {process_time:.1f}s")
        print_gpu_stats(f"[Worker {file_idx}] During processing: ")

        # Save transcript (no speaker info)
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(f"# Whisper Large-v3 Transcript (GPU Optimized)\n")
            f.write(f"# File: {os.path.basename(audio_path)}\n")
            f.write(f"# Date: {datetime.now()}\n")
            f.write(f"# Language: {result.get('language', 'unknown')}\n")
            f.write(f"# Process time: {process_time:.1f}s\n")
            f.write("#" + "="*60 + "\n\n")

            for segment in result['segments']:
                text = segment['text'].strip()
                if text:
                    # Basic text cleanup
                    text = text.replace(' h m h c ', ' HMHC ')
                    text = text.replace(' c a ', ' CA ')
                    text = text.replace(' . ', '.')

                    f.write(f"[{segment['start']:06.2f} → {segment['end']:06.2f}] {text}\n")

        return True, output_file

    except Exception as e:
        print(f"[Worker {file_idx}] Error: {e}")
        return False, str(e)

# ============================================
# MAIN PROCESSING WITH PARALLEL OPTION
# ============================================
def main():
    # Find files to process
    wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
    remaining = []

    for wav_file in wav_files:
        base_name = os.path.splitext(os.path.basename(wav_file))[0]
        output_exists = any(
            os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
            for suffix in ['_gpu_optimized.txt', '_final.txt', '_enhanced.txt', '_ultimate.txt']
        )
        if not output_exists:
            remaining.append(wav_file)

    print(f"\nFiles to process: {len(remaining)}")

    if not remaining:
        print("✅ All files already processed!")
        return

    # Process files
    files_to_process = remaining[:6]  # Process up to 6 files this session

    if GPU_OPTIONS['PARALLEL_FILES'] > 1:
        # PARALLEL PROCESSING - Uses more GPU memory
        print(f"\n🔥 PARALLEL MODE: Processing {GPU_OPTIONS['PARALLEL_FILES']} files simultaneously")
        print("⚠️  Warning: This may cause out-of-memory errors!")

        with ThreadPoolExecutor(max_workers=GPU_OPTIONS['PARALLEL_FILES']) as executor:
            futures = {}

            # Submit files for parallel processing
            for i, audio_file in enumerate(files_to_process[:GPU_OPTIONS['PARALLEL_FILES'] * 2]):
                if len(futures) >= GPU_OPTIONS['PARALLEL_FILES']:
                    # Wait for a slot to free up
                    done, _ = as_completed(futures, timeout=None).__next__()
                    futures.pop(done)

                future = executor.submit(transcribe_file_aggressive, model, audio_file, i)
                futures[future] = audio_file

                # Brief pause between submissions
                time.sleep(1)

            # Wait for remaining tasks
            for future in as_completed(futures):
                success, result = future.result()
                if success:
                    print(f"✓ Completed: {os.path.basename(futures[future])}")
                else:
                    print(f"❌ Failed: {os.path.basename(futures[future])}")

    else:
        # SEQUENTIAL PROCESSING - Safer but uses less memory
        print(f"\n🔒 SEQUENTIAL MODE: Processing files one at a time")

        for i, audio_file in enumerate(files_to_process):
            success, result = transcribe_file_aggressive(model, audio_file, i)

            if success:
                print(f"✓ Saved: {os.path.basename(result)}")
            else:
                print(f"❌ Failed: {result}")

            # Monitor memory between files
            print_gpu_stats("Between files: ")

            # Brief cooldown
            if i < len(files_to_process) - 1:
                time.sleep(3)

    # Final cleanup
    mel_cache.clear()
    PERSISTENT_TENSORS.clear()  # Clear persistent memory
    torch.cuda.empty_cache()
    gc.collect()

    print("\n" + "="*60)
    print("✅ SESSION COMPLETE")
    print_gpu_stats("Final: ")
    print("="*60)

# ============================================
# RUN MAIN
# ============================================
if __name__ == "__main__":
    print("\n⚙️  GPU OPTIMIZATION TIPS:")
    print("1. Set PARALLEL_FILES=2 to process 2 files at once (risky!)")
    print("2. Set USE_FLOAT32=True to double memory usage")
    print("3. Increase BEAM_SIZE and BEST_OF for more memory use")
    print("4. Set PRELOAD_TO_GPU=True to cache audio in GPU")
    print("\n⚠️  Higher settings may cause out-of-memory crashes!")
    print("="*60)

    main()

# ============================================
# EXPERIMENTAL: EXTREME GPU USAGE
# ============================================
"""
FOR MAXIMUM GPU USAGE (15+ GB), modify GPU_OPTIONS to:

GPU_OPTIONS = {
    'PARALLEL_FILES': 3,      # Process 3 files at once
    'PRELOAD_TO_GPU': True,   # Cache in GPU
    'MEL_CACHE_SIZE': 20,     # Large cache
    'USE_FLOAT32': True,      # Double precision (2x memory)
    'BEAM_SIZE': 20,          # Very large beam
    'BEST_OF': 20,            # Very large sampling
    'N_FRAMES': 6000,         # Process huge chunks
}

This will likely crash but will definitely use all GPU memory!
"""

📦 Installing dependencies...
✓ Whisper already installed
✓ PyTorch already installed
🚀 Whisper V3 GPU-Optimized System
GPU Memory Optimization Settings:
  PARALLEL_FILES: 2
  PRELOAD_TO_GPU: False
  MEL_CACHE_SIZE: 5
  USE_FLOAT32: False
  BEAM_SIZE: 5
  BEST_OF: 5
  N_FRAMES: 4500
Pre-allocating GPU memory aggressively...
  After allocation 1GB: GPU Memory: 1.1GB allocated, 1.1GB reserved / 15.8GB total
  After allocation 2GB: GPU Memory: 2.1GB allocated, 2.1GB reserved / 15.8GB total
  After allocation 3GB: GPU Memory: 3.2GB allocated, 3.2GB reserved / 15.8GB total
  After allocation 4GB: GPU Memory: 4.3GB allocated, 4.3GB reserved / 15.8GB total
  Allocating extra memory for parallel processing...
  Extra allocation 1: GPU Memory: 6.4GB allocated, 6.4GB reserved / 15.8GB total
  Extra allocation 2: GPU Memory: 8.6GB allocated, 8.6GB reserved / 15.8GB total
  Total persistent tensors: 6

Loading Whisper large-v3...


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 646.12 MiB is free. Process 40798 has 14.11 GiB memory in use. 14.00 GiB allowed; Of the allocated memory 13.70 GiB is allocated by PyTorch, and 306.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [1]:
# WHISPER V3 GPU-OPTIMIZED - MAXIMUM MEMORY UTILIZATION
# Aggressive GPU usage without speaker diarization

import os
import glob
import time
import gc
import subprocess
import sys
import numpy as np
from datetime import datetime
from google.colab import drive
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import queue

# Mount Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# ============================================
# INSTALL DEPENDENCIES
# ============================================
print("📦 Installing dependencies...")
try:
    import whisper
    print("✓ Whisper already installed")
except ImportError:
    print("Installing openai-whisper...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'openai-whisper'])
    import whisper
    print("✓ Whisper installed")

try:
    import torch
    print("✓ PyTorch already installed")
except ImportError:
    print("Installing torch...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', 'torch'])
    import torch

# ============================================
# CONFIGURATION
# ============================================
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"
os.makedirs(OUTPUT_PATH, exist_ok=True)

# ============================================
# GPU OPTIMIZATION OPTIONS - MODIFY THESE!
# ============================================
GPU_OPTIONS = {
    # OPTION 1: Parallel processing (risky but uses more memory)
    'PARALLEL_FILES': 1,  # Process 2 files simultaneously (set to 1 for safe mode)

    # OPTION 2: Pre-cache audio in GPU memory
    'PRELOAD_TO_GPU': True,  # Load audio directly to GPU tensors

    # OPTION 3: Increase internal mel spectrogram cache
    'MEL_CACHE_SIZE': 10,  # Cache multiple mel spectrograms

    # OPTION 4: Keep model in higher precision (uses more memory)
    'USE_FLOAT32': False,  # Set True to use float32 (doubles memory usage)

    # OPTION 5: Decode parameters affecting memory
    'BEAM_SIZE': 10,  # Increase for more memory usage (default 5)
    'BEST_OF': 10,   # Increase for more memory usage (default 5)

    # OPTION 6: Process longer chunks at once
    'N_FRAMES': 5000,  # Default is 3000, increase to process more at once
}

print("🚀 Whisper V3 GPU-Optimized System")
print("="*60)
print("GPU Memory Optimization Settings:")
for key, value in GPU_OPTIONS.items():
    print(f"  {key}: {value}")
print("="*60)

# ============================================
# GPU MEMORY MONITORING
# ============================================
def get_gpu_stats():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        return allocated, reserved, total
    return 0, 0, 0

def print_gpu_stats(prefix=""):
    alloc, reserved, total = get_gpu_stats()
    print(f"{prefix}GPU Memory: {alloc:.1f}GB allocated, {reserved:.1f}GB reserved / {total:.1f}GB total")

# ============================================
# AGGRESSIVE GPU MEMORY ALLOCATION
# ============================================
if torch.cuda.is_available():
    # Force PyTorch to allocate more memory upfront
    torch.cuda.set_per_process_memory_fraction(0.95)  # Use 95% of GPU memory

    # Enable memory efficient attention if available
    if hasattr(torch.nn.functional, 'scaled_dot_product_attention'):
        torch.backends.cuda.enable_mem_efficient_sdp(True)

    # Allocate dummy tensors to reserve memory
    print("Pre-allocating GPU memory...")
    dummy_tensors = []
    try:
        # Try to allocate 2GB chunks until we hit limit
        for i in range(5):
            dummy = torch.zeros(256, 1024, 1024, device='cuda')  # ~1GB each
            dummy_tensors.append(dummy)
            print_gpu_stats(f"  After allocation {i+1}: ")
    except RuntimeError:
        print("  Reached memory limit")

    # Clear dummy tensors but keep memory reserved
    dummy_tensors.clear()
    torch.cuda.empty_cache()

# ============================================
# LOAD MODEL WITH OPTIONS
# ============================================
print("\nLoading Whisper large-v3...")
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model with specific dtype
dtype = torch.float32 if GPU_OPTIONS['USE_FLOAT32'] else torch.float16
model = whisper.load_model("large-v3", device=device)

# Force model to use more memory
if device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']:
    model = model.half()  # FP16 mode
else:
    model = model.float()  # FP32 mode (uses 2x memory)

print_gpu_stats("After model load: ")

# Modify model's n_frames if specified
if hasattr(model, 'dims') and hasattr(model.dims, 'n_audio_ctx'):
    original_frames = model.dims.n_audio_ctx
    model.dims.n_audio_ctx = GPU_OPTIONS['N_FRAMES']
    print(f"Modified n_frames: {original_frames} → {GPU_OPTIONS['N_FRAMES']}")

# ============================================
# MEL SPECTROGRAM CACHE
# ============================================
mel_cache = {}
mel_cache_lock = threading.Lock()

def get_mel_cached(audio_path, model):
    """Cache mel spectrograms in GPU memory"""
    with mel_cache_lock:
        if audio_path in mel_cache:
            return mel_cache[audio_path]

        # Load and compute mel
        audio = whisper.load_audio(audio_path)
        audio = whisper.pad_or_trim(audio)
        mel = whisper.log_mel_spectrogram(audio).to(model.device)

        # Cache if under limit
        if len(mel_cache) < GPU_OPTIONS['MEL_CACHE_SIZE']:
            mel_cache[audio_path] = mel
            print_gpu_stats(f"  Cached mel #{len(mel_cache)}: ")

        return mel

# ============================================
# PARALLEL TRANSCRIPTION FUNCTION
# ============================================
def transcribe_file_aggressive(model, audio_path, file_idx=0):
    """Transcribe with aggressive GPU usage"""
    base_name = os.path.splitext(os.path.basename(audio_path))[0]
    output_file = os.path.join(OUTPUT_PATH, f"{base_name}_gpu_optimized.txt")

    try:
        print(f"\n[Worker {file_idx}] Processing: {os.path.basename(audio_path)}")
        start_time = time.time()

        if GPU_OPTIONS['PRELOAD_TO_GPU']:
            # Load audio and convert to GPU tensor immediately
            print(f"[Worker {file_idx}] Pre-loading to GPU...")
            audio = whisper.load_audio(audio_path)
            audio_tensor = torch.from_numpy(audio).to(device)

            # Process from GPU tensor
            result = model.transcribe(
                audio_tensor.cpu().numpy(),  # Whisper expects numpy
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

            # Keep tensor in GPU memory during processing
            audio_tensor = audio_tensor.contiguous()

        else:
            # Standard transcription
            result = model.transcribe(
                audio_path,
                language=None,
                temperature=0.2,
                beam_size=GPU_OPTIONS['BEAM_SIZE'],
                best_of=GPU_OPTIONS['BEST_OF'],
                fp16=(device == "cuda" and not GPU_OPTIONS['USE_FLOAT32']),
                condition_on_previous_text=True,
                word_timestamps=True,
                verbose=False
            )

        process_time = time.time() - start_time
        print(f"[Worker {file_idx}] Completed in {process_time:.1f}s")
        print_gpu_stats(f"[Worker {file_idx}] During processing: ")

        # Save transcript (no speaker info)
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(f"# Whisper Large-v3 Transcript (GPU Optimized)\n")
            f.write(f"# File: {os.path.basename(audio_path)}\n")
            f.write(f"# Date: {datetime.now()}\n")
            f.write(f"# Language: {result.get('language', 'unknown')}\n")
            f.write(f"# Process time: {process_time:.1f}s\n")
            f.write("#" + "="*60 + "\n\n")

            for segment in result['segments']:
                text = segment['text'].strip()
                if text:
                    # Basic text cleanup
                    text = text.replace(' h m h c ', ' HMHC ')
                    text = text.replace(' c a ', ' CA ')
                    text = text.replace(' . ', '.')

                    f.write(f"[{segment['start']:06.2f} → {segment['end']:06.2f}] {text}\n")

        return True, output_file

    except Exception as e:
        print(f"[Worker {file_idx}] Error: {e}")
        return False, str(e)

# ============================================
# MAIN PROCESSING WITH PARALLEL OPTION
# ============================================
def main():
    # Find files to process
    wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
    remaining = []

    for wav_file in wav_files:
        base_name = os.path.splitext(os.path.basename(wav_file))[0]
        output_exists = any(
            os.path.exists(os.path.join(OUTPUT_PATH, f"{base_name}{suffix}"))
            for suffix in ['_gpu_optimized.txt', '_final.txt', '_enhanced.txt', '_ultimate.txt']
        )
        if not output_exists:
            remaining.append(wav_file)

    print(f"\nFiles to process: {len(remaining)}")

    if not remaining:
        print("✅ All files already processed!")
        return

    # Process files
    files_to_process = remaining[:6]  # Process up to 6 files this session

    if GPU_OPTIONS['PARALLEL_FILES'] > 1:
        # PARALLEL PROCESSING - Uses more GPU memory
        print(f"\n🔥 PARALLEL MODE: Processing {GPU_OPTIONS['PARALLEL_FILES']} files simultaneously")
        print("⚠️  Warning: This may cause out-of-memory errors!")

        with ThreadPoolExecutor(max_workers=GPU_OPTIONS['PARALLEL_FILES']) as executor:
            futures = {}

            # Submit files for parallel processing
            for i, audio_file in enumerate(files_to_process[:GPU_OPTIONS['PARALLEL_FILES'] * 2]):
                if len(futures) >= GPU_OPTIONS['PARALLEL_FILES']:
                    # Wait for a slot to free up
                    done, _ = as_completed(futures, timeout=None).__next__()
                    futures.pop(done)

                future = executor.submit(transcribe_file_aggressive, model, audio_file, i)
                futures[future] = audio_file

                # Brief pause between submissions
                time.sleep(1)

            # Wait for remaining tasks
            for future in as_completed(futures):
                success, result = future.result()
                if success:
                    print(f"✓ Completed: {os.path.basename(futures[future])}")
                else:
                    print(f"❌ Failed: {os.path.basename(futures[future])}")

    else:
        # SEQUENTIAL PROCESSING - Safer but uses less memory
        print(f"\n🔒 SEQUENTIAL MODE: Processing files one at a time")

        for i, audio_file in enumerate(files_to_process):
            success, result = transcribe_file_aggressive(model, audio_file, i)

            if success:
                print(f"✓ Saved: {os.path.basename(result)}")
            else:
                print(f"❌ Failed: {result}")

            # Monitor memory between files
            print_gpu_stats("Between files: ")

            # Brief cooldown
            if i < len(files_to_process) - 1:
                time.sleep(3)

    # Final cleanup
    mel_cache.clear()
    torch.cuda.empty_cache()
    gc.collect()

    print("\n" + "="*60)
    print("✅ SESSION COMPLETE")
    print_gpu_stats("Final: ")
    print("="*60)

# ============================================
# RUN MAIN
# ============================================
if __name__ == "__main__":
    print("\n⚙️  GPU OPTIMIZATION TIPS:")
    print("1. Set PARALLEL_FILES=2 to process 2 files at once (risky!)")
    print("2. Set USE_FLOAT32=True to double memory usage")
    print("3. Increase BEAM_SIZE and BEST_OF for more memory use")
    print("4. Set PRELOAD_TO_GPU=True to cache audio in GPU")
    print("\n⚠️  Higher settings may cause out-of-memory crashes!")
    print("="*60)

    main()

# ============================================
# EXPERIMENTAL: EXTREME GPU USAGE
# ============================================
"""
FOR MAXIMUM GPU USAGE (15+ GB), modify GPU_OPTIONS to:

GPU_OPTIONS = {
    'PARALLEL_FILES': 3,      # Process 3 files at once
    'PRELOAD_TO_GPU': True,   # Cache in GPU
    'MEL_CACHE_SIZE': 20,     # Large cache
    'USE_FLOAT32': True,      # Double precision (2x memory)
    'BEAM_SIZE': 20,          # Very large beam
    'BEST_OF': 20,            # Very large sampling
    'N_FRAMES': 6000,         # Process huge chunks
}

This will likely crash but will definitely use all GPU memory!
"""

Mounted at /content/drive
📦 Installing dependencies...
Installing openai-whisper...
✓ Whisper installed
✓ PyTorch already installed
🚀 Whisper V3 GPU-Optimized System
GPU Memory Optimization Settings:
  PARALLEL_FILES: 1
  PRELOAD_TO_GPU: True
  MEL_CACHE_SIZE: 10
  USE_FLOAT32: False
  BEAM_SIZE: 10
  BEST_OF: 10
  N_FRAMES: 5000
Pre-allocating GPU memory...
  After allocation 1: GPU Memory: 1.1GB allocated, 1.1GB reserved / 15.8GB total
  After allocation 2: GPU Memory: 2.1GB allocated, 2.1GB reserved / 15.8GB total
  After allocation 3: GPU Memory: 3.2GB allocated, 3.2GB reserved / 15.8GB total
  After allocation 4: GPU Memory: 4.3GB allocated, 4.3GB reserved / 15.8GB total
  After allocation 5: GPU Memory: 5.4GB allocated, 5.4GB reserved / 15.8GB total

Loading Whisper large-v3...


100%|█████████████████████████████████████| 2.88G/2.88G [01:57<00:00, 26.3MiB/s]


After model load: GPU Memory: 4.2GB allocated, 10.9GB reserved / 15.8GB total
Modified n_frames: 1500 → 5000

⚙️  GPU OPTIMIZATION TIPS:
1. Set PARALLEL_FILES=2 to process 2 files at once (risky!)
2. Set USE_FLOAT32=True to double memory usage
3. Increase BEAM_SIZE and BEST_OF for more memory use
4. Set PRELOAD_TO_GPU=True to cache audio in GPU

⚠️  Higher settings may cause out-of-memory crashes!

Files to process: 5

🔒 SEQUENTIAL MODE: Processing files one at a time

[Worker 0] Processing: Call Recording - 19Mar2025 0800 JD.wav
[Worker 0] Pre-loading to GPU...
[Worker 0] Error: expected scalar type Float but found Half
❌ Failed: expected scalar type Float but found Half
Between files: GPU Memory: 4.2GB allocated, 10.9GB reserved / 15.8GB total

[Worker 1] Processing: Call Recording - 19Mar25 0900 - AJ.wav
[Worker 1] Pre-loading to GPU...
[Worker 1] Error: expected scalar type Float but found Half
❌ Failed: expected scalar type Float but found Half
Between files: GPU Memory: 4.2GB all

"\nFOR MAXIMUM GPU USAGE (15+ GB), modify GPU_OPTIONS to:\n\nGPU_OPTIONS = {\n    'PARALLEL_FILES': 3,      # Process 3 files at once\n    'PRELOAD_TO_GPU': True,   # Cache in GPU\n    'MEL_CACHE_SIZE': 20,     # Large cache\n    'USE_FLOAT32': True,      # Double precision (2x memory)\n    'BEAM_SIZE': 20,          # Very large beam\n    'BEST_OF': 20,            # Very large sampling\n    'N_FRAMES': 6000,         # Process huge chunks\n}\n\nThis will likely crash but will definitely use all GPU memory!\n"

# Test Drive File Storage


In [3]:
# SIMPLE WORKING TRANSCRIPTION SYSTEM
# Based on the approach that was working

# ============================================
# CELL 1: Complete Setup and Processing
# ============================================

import os
import glob
import json
import time
import gc
import subprocess
from datetime import datetime
from google.colab import drive

# Mount Drive
# if not os.path.exists('/content/drive'):
#    drive.mount('/content/drive')

In [4]:
# ============================================
# BLOCK 2: File Discovery and Status
# ============================================
"""
Run this to see what files need processing
"""

# Mount Google Drive
drive.mount('/content/drive')

# Define paths - adjust these to your actual locations
INPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Recordings_PRUT"
OUTPUT_PATH = "/content/drive/My Drive/PRUT-Transcriptions/Transcripts"

# Get list of audio files
mp4_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.mp4")))
wav_files = sorted(glob.glob(os.path.join(INPUT_PATH, "*.wav")))
all_audio_files = mp4_files + wav_files

print(f"\n📁 Found {len(all_audio_files)} audio files:")
for i, f in enumerate(all_audio_files, 1):
    print(f"  {i}. {os.path.basename(f)}")

# Check what's already been transcribed
completed_files = []
remaining_files = []

for audio_file in all_audio_files:
    base_name = os.path.splitext(os.path.basename(audio_file))[0]
    transcript_path = os.path.join(OUTPUT_PATH, f"{base_name}_transcript.txt")

    if os.path.exists(transcript_path):
        completed_files.append(audio_file)
    else:
        remaining_files.append(audio_file)

print(f"\n📊 Status:")
print(f"  ✓ Completed: {len(completed_files)}")
print(f"  ⏳ Remaining: {len(remaining_files)}")

if remaining_files:
    print(f"\n🎯 Next file to process: {os.path.basename(remaining_files[0])}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

📁 Found 8 audio files:
  1. Call Recording - 13Mar2025 1200 BPA.wav
  2. Call Recording - 13Mar25 1130 BK.wav
  3. Call Recording - 13Mar25 1300 HB.wav
  4. Call Recording - 19Mar2025 0800 JD.wav
  5. Call Recording - 19Mar25 0900 - AJ.wav
  6. Call Recording - 19Mar25 1730 - MO.wav
  7. Call Recording - 20Mar2025 1200 LN.wav
  8. Call Recording - 26Mar2025 0830 SA.wav

📊 Status:
  ✓ Completed: 0
  ⏳ Remaining: 8

🎯 Next file to process: Call Recording - 13Mar2025 1200 BPA.wav
