# Test Sarvam AI Speech-to-Text on Single File

Test Sarvam's STT API with one audio file.

In [None]:
import os
import requests
from dotenv import load_dotenv
from src.voice_eval.config import load_config

In [None]:
load_dotenv()

api_key = os.getenv('SARVAM_API_KEY')
if not api_key:
    raise ValueError("SARVAM_API_KEY not found in .env file")

print("✓ API key loaded")

In [None]:
audio_dir = load_config('input', 'audio_dir')
language = load_config('whisper', 'language')

print(f"Audio directory: {audio_dir}")
print(f"Language: {language}")

In [None]:
test_file = f"{audio_dir}/+919742536994_3.mp4"
print(f"Transcribing: {test_file}")
print(f"Duration: 9.9 seconds (shortest file)")

In [None]:
url = "https://api.sarvam.ai/speech-to-text"

headers = {
    "api-subscription-key": api_key
}

with open(test_file, 'rb') as f:
    files = {
        'file': (os.path.basename(test_file), f, 'audio/mp4')
    }
    data = {
        'language_code': 'kn-IN'
    }
    
    print("Sending request to Sarvam API...\n")
    response = requests.post(url, headers=headers, files=files, data=data)

print(f"Status code: {response.status_code}")
print(f"Response: {response.json()}")

In [None]:
if response.status_code == 200:
    result = response.json()
    kannada_transcript = result.get('transcript', 'No transcript found')
    
    print("\n" + "="*60)
    print("TRANSCRIPTION (Kannada):")
    print("="*60)
    print(kannada_transcript)
    
    # Transliterate to Roman script
    print("\n" + "="*60)
    print("TRANSLITERATING TO ROMAN SCRIPT...")
    print("="*60)
    
    transliterate_url = "https://api.sarvam.ai/transliterate"
    transliterate_headers = {
        "api-subscription-key": api_key,
        "Content-Type": "application/json"
    }
    transliterate_payload = {
        "input": kannada_transcript,
        "source_language_code": "kn-IN",
        "target_language_code": "en-IN"
    }
    
    transliterate_response = requests.post(
        transliterate_url, 
        headers=transliterate_headers, 
        json=transliterate_payload
    )
    
    if transliterate_response.status_code == 200:
        transliterate_result = transliterate_response.json()
        romanized_transcript = transliterate_result.get('transliterated_text', 'No transliteration found')
        
        print("\n" + "="*60)
        print("TRANSCRIPTION (Romanized):")
        print("="*60)
        print(romanized_transcript)
    else:
        print(f"\n✗ Transliteration failed: {transliterate_response.status_code}")
        romanized_transcript = None
    
    print("\n" + "="*60)
    print("FULL RESPONSE:")
    print("="*60)
    print(f"STT Request ID: {result.get('request_id')}")
    print(f"Language: {result.get('language_code')}")
    print(f"\nKannada: {kannada_transcript}")
    print(f"\nRomanized: {romanized_transcript}")
    
    print("\n✓ Sarvam transcription successful!")
else:
    print(f"\n✗ Error: {response.status_code}")
    print(response.text)