# Test Whisper API + Sarvam Transliteration Pipeline

This notebook tests the full pipeline:
1. Transcribe audio with OpenAI Whisper API (Kannada script)
2. Transliterate to Roman script using Sarvam AI
3. Save both versions

**Refactored to use:** 
- `src/voice_eval/whisper_api.py`
- `src/voice_eval/sarvam_api.py`

In [None]:
import json
from dotenv import load_dotenv
from src.voice_eval.config import load_config
from src.voice_eval.whisper_api import transcribe_audio, transcription_to_dict, estimate_cost
from src.voice_eval.sarvam_api import transliterate_text, transliterate_segments, get_sarvam_api_key
from src.voice_eval.storage import write_file

In [None]:
# Load environment variables
load_dotenv()
sarvam_key = get_sarvam_api_key()
print("✓ API keys loaded")

In [None]:
# Load configuration
language = load_config('whisper', 'language')
audio_dir = load_config('input', 'audio_dir')

print(f"Language: {language}")
print(f"Audio directory: {audio_dir}")

In [None]:
# Pick an MP3 file for testing
test_file = f"{audio_dir}/GHPS.  Bammanakatti.mp3"
print(f"Test file: {test_file}")

# Check file size
import os
file_size_mb = os.path.getsize(test_file) / (1024 * 1024)
print(f"File size: {file_size_mb:.2f} MB")

## Step 1: Transcribe with Whisper API

In [None]:
print("\n" + "="*60)
print("STEP 1: Transcribing with OpenAI Whisper API...")
print("="*60)

transcript = transcribe_audio(
    audio_path=test_file,
    language=language,
    response_format="verbose_json",
    timestamp_granularities=["segment"]
)

print(f"✓ Transcription complete!")
print(f"  Language detected: {transcript.language}")
print(f"  Duration: {transcript.duration:.2f} seconds")
print(f"  Segments: {len(transcript.segments)}")
print(f"  Cost: ${estimate_cost(transcript.duration):.4f}")
print(f"\nFirst 200 chars (Kannada script):\n{transcript.text[:200]}...")

## Step 2: Transliterate Full Text with Sarvam AI

In [None]:
print("\n" + "="*60)
print("STEP 2: Transliterating full text to Roman script...")
print("="*60)

romanized_text = transliterate_text(
    text=transcript.text,
    api_key=sarvam_key,
    source_language_code="kn-IN"
)

if romanized_text:
    print(f"✓ Transliteration complete!")
    print(f"\nFirst 200 chars (Romanized):\n{romanized_text[:200]}...")
else:
    print("✗ Transliteration failed")
    romanized_text = None

## Step 3: Transliterate Individual Segments

In [None]:
print("\n" + "="*60)
print("STEP 3: Transliterating individual segments...")
print("="*60)

# Convert segments to dictionaries
segments_dict = transcription_to_dict(transcript)["segments"]

romanized_segments = transliterate_segments(
    segments=segments_dict,
    api_key=sarvam_key,
    source_language_code="kn-IN",
    text_field="text"
)

print(f"✓ All {len(romanized_segments)} segments transliterated!")

## Step 4: Save Combined Results

In [None]:
print("\n" + "="*60)
print("STEP 4: Saving results...")
print("="*60)

# Create combined response
combined_response = {
    "metadata": {
        "file": test_file,
        "language": transcript.language,
        "duration": transcript.duration,
        "whisper_model": "whisper-1",
        "transliteration_provider": "sarvam-ai",
        "cost_usd": estimate_cost(transcript.duration)
    },
    "transcription": {
        "text_kannada": transcript.text,
        "text_romanized": romanized_text
    },
    "segments": romanized_segments
}

# Save to JSON
output_path = write_file(
    "whisper_sarvam_combined_response.json",
    json.dumps(combined_response, indent=2, ensure_ascii=False),
    base_dir="files/transcriptions/whisper_sarvam_test"
)

print(f"✓ Combined results saved to: {output_path}")

## Step 5: Display Sample Results

In [None]:
print("\n" + "="*60)
print("SAMPLE SEGMENTS (First 3)")
print("="*60)

for i, seg in enumerate(romanized_segments[:3], 1):
    print(f"\n[Segment {i}] {seg['start']:.2f}s → {seg['end']:.2f}s")
    print(f"  Kannada:    {seg['text'][:80]}..." if len(seg['text']) > 80 else f"  Kannada:    {seg['text']}")
    print(f"  Romanized:  {seg['text_romanized'][:80]}..." if len(seg.get('text_romanized', '')) > 80 else f"  Romanized:  {seg.get('text_romanized', 'N/A')}")

print(f"\n... ({len(romanized_segments) - 3} more segments)")
print("\n✅ Pipeline test successful!")