# Test OpenAI GPT Transliteration vs Sarvam AI

Compare OpenAI GPT-based transliteration against Sarvam AI's specialized transliteration service.

**Models tested:**
- `gpt-4o-mini` - Cost-efficient model
- Sarvam AI transliteration API

**Evaluation criteria:**
1. Romanization accuracy
2. Consistency across segments
3. Cost per character
4. Latency

In [None]:
import json
import time
from dotenv import load_dotenv
from openai import OpenAI
import os
from src.voice_eval.sarvam_api import transliterate_text, get_sarvam_api_key

In [None]:
# Load API keys
load_dotenv()
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
sarvam_key = get_sarvam_api_key()
print("✓ API keys loaded")

## Test Data: Sample Kannada Text

Using transcription from previous Whisper API test.

In [None]:
# Sample Kannada text from Whisper transcription
# This is from the shortest segment for quick testing
test_text_short = "ಇಲ್ಲಿ ಬೇರೆ ಕಡೆ ಅದಾನ್ರಿ. ಹೌದ್ರಿ ಅಕ್ಕಿ ಲೆಕ್ಕ ಹೇಳ್ತಿದ್ರಿ ಅದಕ್ಕ್ರಿ."

# Longer segment for better comparison
test_text_long = """ಪ್ರದಿಪದಾನ್ನು ನಿವು ಅವನ್ನು ಬಾಜೋಕ್ಕಾಲ್ಲೇ ಕುತ್ತುಗಾರಿ. ಇದೆ ಗಣಿತ ಗಣಕ್ಕಾಂತಕಂತಾ ಒಂದು ಹಸಾಯಿ ಯೋಜನೆರಿದೆ. 
ಮೂರು ನಾಕು ಅಯಿನೆ ತರಗತಿ ಮಕ್ಕಳೆಗೆ ಯಾರು ಸಲ್ಪ ಮಾಕ್ಕಾರದಲ್ಲಿ ಒಂದು ತನಾಯಿ ಯೋಜನೆರಿದೆ."""

print(f"Short test: {len(test_text_short)} characters")
print(f"Long test: {len(test_text_long)} characters")
print(f"\nKannada text (short):\n{test_text_short}")

## Method 1: OpenAI GPT-4o-mini Transliteration

In [None]:
def transliterate_with_gpt(
    text: str,
    model: str = "gpt-4o-mini",
    system_prompt: str = None
) -> dict:
    """
    Transliterate Kannada text to Roman script using OpenAI GPT.
    
    Returns dict with:
    - transliterated_text: The romanized output
    - model: Model used
    - tokens: Token usage
    - latency_ms: Response time in milliseconds
    - estimated_cost_usd: Cost estimate
    """
    if system_prompt is None:
        system_prompt = (
            "You are a Kannada language expert specializing in transliteration. "
            "Transliterate the provided Kannada text to Roman/Latin script. "
            "Use standard phonetic romanization that is readable and pronounceable. "
            "Preserve the original meaning and pronunciation as closely as possible. "
            "Only output the transliterated text, no explanations."
        )
    
    start_time = time.time()
    
    response = openai_client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": text}
        ],
        temperature=0.0  # Deterministic output
    )
    
    latency_ms = (time.time() - start_time) * 1000
    
    # Extract token usage and calculate cost
    # GPT-4o-mini pricing: $0.15 per 1M input tokens, $0.60 per 1M output tokens
    input_tokens = response.usage.prompt_tokens
    output_tokens = response.usage.completion_tokens
    cost_usd = (input_tokens * 0.15 / 1_000_000) + (output_tokens * 0.60 / 1_000_000)
    
    return {
        "transliterated_text": response.choices[0].message.content.strip(),
        "model": model,
        "tokens": {
            "input": input_tokens,
            "output": output_tokens,
            "total": response.usage.total_tokens
        },
        "latency_ms": latency_ms,
        "estimated_cost_usd": cost_usd
    }

In [None]:
print("=" * 60)
print("Testing OpenAI GPT-4o-mini (short text)")
print("=" * 60)

gpt_result_short = transliterate_with_gpt(test_text_short)

print(f"\nOriginal (Kannada):\n{test_text_short}")
print(f"\nTransliterated (GPT-4o-mini):\n{gpt_result_short['transliterated_text']}")
print(f"\nMetrics:")
print(f"  Latency: {gpt_result_short['latency_ms']:.0f}ms")
print(f"  Tokens: {gpt_result_short['tokens']['total']} (in: {gpt_result_short['tokens']['input']}, out: {gpt_result_short['tokens']['output']})")
print(f"  Cost: ${gpt_result_short['estimated_cost_usd']:.6f}")

## Method 2: Sarvam AI Transliteration

In [None]:
print("=" * 60)
print("Testing Sarvam AI (short text)")
print("=" * 60)

start_time = time.time()
sarvam_result_short = transliterate_text(
    text=test_text_short,
    api_key=sarvam_key,
    source_language_code="kn-IN"
)
sarvam_latency_ms = (time.time() - start_time) * 1000

print(f"\nOriginal (Kannada):\n{test_text_short}")
print(f"\nTransliterated (Sarvam AI):\n{sarvam_result_short}")
print(f"\nMetrics:")
print(f"  Latency: {sarvam_latency_ms:.0f}ms")
print(f"  Cost: Unknown (Sarvam pricing not publicly listed)")

## Side-by-Side Comparison (Short Text)

In [None]:
print("=" * 60)
print("COMPARISON: Short Text")
print("=" * 60)
print(f"\nOriginal (Kannada):")
print(f"  {test_text_short}")
print(f"\nGPT-4o-mini:")
print(f"  {gpt_result_short['transliterated_text']}")
print(f"\nSarvam AI:")
print(f"  {sarvam_result_short}")
print(f"\nLatency:")
print(f"  GPT-4o-mini: {gpt_result_short['latency_ms']:.0f}ms")
print(f"  Sarvam AI:   {sarvam_latency_ms:.0f}ms")
print(f"\nCost per character (GPT-4o-mini):")
print(f"  ${gpt_result_short['estimated_cost_usd'] / len(test_text_short):.8f} per char")

## Extended Test: Longer Text

In [None]:
print("=" * 60)
print("Testing with longer text segment")
print("=" * 60)

# GPT-4o-mini
gpt_result_long = transliterate_with_gpt(test_text_long)

# Sarvam AI
start_time = time.time()
sarvam_result_long = transliterate_text(
    text=test_text_long,
    api_key=sarvam_key,
    source_language_code="kn-IN"
)
sarvam_latency_long_ms = (time.time() - start_time) * 1000

print(f"\nText length: {len(test_text_long)} characters")
print(f"\n" + "-" * 60)
print("GPT-4o-mini output:")
print("-" * 60)
print(gpt_result_long['transliterated_text'])
print(f"\nLatency: {gpt_result_long['latency_ms']:.0f}ms")
print(f"Tokens: {gpt_result_long['tokens']['total']}")
print(f"Cost: ${gpt_result_long['estimated_cost_usd']:.6f}")

print(f"\n" + "-" * 60)
print("Sarvam AI output:")
print("-" * 60)
print(sarvam_result_long)
print(f"\nLatency: {sarvam_latency_long_ms:.0f}ms")

## Cost Projection for Full Dataset

Estimate costs for processing all 42 audio files (~9 hours of transcription).

In [None]:
# Rough estimate: Assume average transcription produces ~50 chars per second of audio
# Dataset: 9 hours = 32,400 seconds
# Estimated total Kannada characters: ~1,620,000 chars

dataset_duration_seconds = 9 * 3600
estimated_chars_per_second = 50  # Conservative estimate
estimated_total_chars = dataset_duration_seconds * estimated_chars_per_second

# GPT cost per character (based on longer text test)
gpt_cost_per_char = gpt_result_long['estimated_cost_usd'] / len(test_text_long)
gpt_total_cost = estimated_total_chars * gpt_cost_per_char

print("=" * 60)
print("COST PROJECTION: Full Dataset (42 files, 9 hours)")
print("=" * 60)
print(f"\nEstimated total characters: {estimated_total_chars:,}")
print(f"\nGPT-4o-mini transliteration:")
print(f"  Cost per character: ${gpt_cost_per_char:.8f}")
print(f"  Estimated total cost: ${gpt_total_cost:.4f}")
print(f"\nSarvam AI transliteration:")
print(f"  Cost: Unknown (pricing not publicly available)")
print(f"\nFor reference:")
print(f"  Whisper API transcription cost: ~$3.24 (for 9 hours)")
print(f"  Combined (Whisper + GPT): ~${3.24 + gpt_total_cost:.2f}")

## Summary and Recommendations

In [None]:
print("=" * 60)
print("SUMMARY")
print("=" * 60)
print("\n✓ Both services successfully transliterate Kannada to Roman script")
print("\n📊 Key Differences:")
print("\n1. Quality:")
print("   - Compare outputs above for readability and accuracy")
print("   - GPT may be more flexible with custom prompts")
print("   - Sarvam is specialized for Indian languages")
print("\n2. Latency:")
print(f"   - GPT-4o-mini: ~{gpt_result_short['latency_ms']:.0f}ms (short), ~{gpt_result_long['latency_ms']:.0f}ms (long)")
print(f"   - Sarvam AI:   ~{sarvam_latency_ms:.0f}ms (short), ~{sarvam_latency_long_ms:.0f}ms (long)")
print("\n3. Cost (estimated):")
print(f"   - GPT-4o-mini: ~${gpt_total_cost:.2f} for full dataset")
print("   - Sarvam AI:   Unknown")
print("\n4. Integration:")
print("   - GPT: Single API (OpenAI) for both transcription + transliteration")
print("   - Sarvam: Separate API, but specialized for Indic languages")
print("\n💡 Recommendation: Review outputs above to assess quality difference before deciding.")