# Test OpenAI Whisper API on Single File

Test the OpenAI Whisper API with one audio file to verify it works before batch processing.

**Refactored to use:** `src/voice_eval/whisper_api.py`

In [None]:
import json
from dotenv import load_dotenv
from src.voice_eval.config import load_config
from src.voice_eval.whisper_api import transcribe_audio, transcription_to_dict, estimate_cost
from src.voice_eval.storage import write_file

In [None]:
# Load environment variables
load_dotenv()
print("✓ Environment loaded")

In [None]:
# Load configuration
language = load_config('whisper', 'language')
audio_dir = load_config('input', 'audio_dir')

print(f"Language: {language}")
print(f"Audio directory: {audio_dir}")

In [None]:
# Pick an MP3 file for testing
test_file = f"{audio_dir}/GHPS.  Bammanakatti.mp3"
print(f"Test file: {test_file}")

# Check file size
import os
file_size_mb = os.path.getsize(test_file) / (1024 * 1024)
print(f"File size: {file_size_mb:.2f} MB (limit: 25 MB) ✓")

In [None]:
# Transcribe with OpenAI Whisper API
print("\nSending request to OpenAI Whisper API...\n")

transcript = transcribe_audio(
    audio_path=test_file,
    language=language,
    response_format="verbose_json",
    timestamp_granularities=["segment"]
)

print("✓ Transcription complete!")

In [None]:
# Display results
print("\n" + "="*60)
print("METADATA:")
print("="*60)
print(f"Language: {transcript.language}")
print(f"Duration: {transcript.duration:.2f} seconds")
print(f"Cost estimate: ${estimate_cost(transcript.duration):.4f}")

print("\n" + "="*60)
print("FULL TRANSCRIPTION:")
print("="*60)
print(transcript.text[:500] + "..." if len(transcript.text) > 500 else transcript.text)

In [None]:
# Display timestamped segments (first 3)
print("\n" + "="*60)
print(f"TIMESTAMPED SEGMENTS ({len(transcript.segments)} total - showing first 3):")
print("="*60)

for i, segment in enumerate(transcript.segments[:3], 1):
    print(f"\n[Segment {i}]")
    print(f"Time: {segment.start:.2f}s -> {segment.end:.2f}s")
    print(f"Text: {segment.text}")
    print(f"Confidence: no_speech_prob={segment.no_speech_prob:.3f}")

In [None]:
# Save response to JSON
response_dict = transcription_to_dict(transcript)

output_path = write_file(
    "whisper_api_test_response.json",
    json.dumps(response_dict, indent=2, ensure_ascii=False),
    base_dir="files/transcriptions/whisper_api_test"
)

print(f"\n✓ Response saved to: {output_path}")

In [None]:
print("\n✅ Test successful!")