# This notebook will server as a Testing notebook for Gemma Voice Commander

# The first feature will be sending audio message to gemma

In [27]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_local_wav_audio():
    """Test local WAV audio with custom prompt"""
    audio_path = "sample_audio.wav"  # Use WAV format instead of MP3
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"✅ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt
        }
        
        print(f"🚀 Sending to server with prompt: {prompt}")
        response = requests.post(
            f"{SERVER_URL}/ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"✅ Response: {result['text']}")
            print(f"Prompt used: {result.get('prompt_used', 'N/A')}")
        else:
            print(f"❌ Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"❌ {audio_path} not found. Please convert your MP3 to WAV format.")
        print("💡 You can convert MP3 to WAV using:")
        print("   - Online converters")
        print("   - FFmpeg: ffmpeg -i sample_audio.mp3 sample_audio.wav")
        print("   - Audacity (free audio editor)")
        
    except Exception as e:
        print(f"❌ Error: {e}")

def test_google_wav_audio():
    """Test with Google's working WAV file"""
    print("🎵 Testing with Google's WAV file...")
    
    audio_url = "https://ai.google.dev/gemma/docs/audio/roses-are.wav"
    
    try:
        response = requests.get(audio_url, timeout=10)
        
        if response.status_code != 200:
            print(f"❌ Failed to download audio: {response.status_code}")
            return
        
        print("✅ Downloaded Google's WAV file")
        
        audio_base64 = base64.b64encode(response.content).decode('utf-8')
        print(f"✅ Converted to base64 ({len(audio_base64)} chars)")
        
        data = {
            "data": audio_base64,
            "prompt": "Translate this audio into English"
        }
        
        print("🚀 Sending to server...")
        server_response = requests.post(
            f"{SERVER_URL}/ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {server_response.status_code}")
        
        if server_response.status_code == 200:
            result = server_response.json()
            print(f"✅ Response: {result['text']}")
            print(f"✅ Prompt used: {result['prompt_used']}")
        else:
            print(f"❌ Server error: {server_response.text}")
            
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    choice = input("Choose test:\n1. Local WAV file\n2. Google's WAV file\nEnter choice (1-2): ")
    
    if choice == "1":
        test_local_wav_audio()
    else:
        test_google_wav_audio()

✅ Loaded and encoded sample_audio.wav to base64
🚀 Sending to server with prompt: Transcribe this audio into Hindi
Status: 200
✅ Response: माजी, ज़ोन बी की सिक्योरिटी अपडेट दीजिए प्लीज़।
Prompt used: Transcribe this audio into Hindi


In [33]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_improved_audio():
    """Test audio with NEW preprocessing for better accuracy"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"✅ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "enable_preprocessing": True  # NEW: Enable preprocessing for better accuracy
        }
        
        print(f"🚀 Sending to server with IMPROVED processing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"✅ IMPROVED Response: {result['text']}")
            print(f"Status: {result['status']}")
        else:
            print(f"❌ Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"❌ {audio_path} not found.")
    except Exception as e:
        print(f"❌ Error: {e}")

def test_old_audio():
    """Test audio without preprocessing (your old method)"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"✅ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "enable_preprocessing": False  # Disable preprocessing
        }
        
        print(f"🚀 Sending to server with OLD processing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"✅ OLD Response: {result['text']}")
            print(f"Status: {result['status']}")
        else:
            print(f"❌ Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"❌ {audio_path} not found.")
    except Exception as e:
        print(f"❌ Error: {e}")

def compare_both():
    """Compare both methods side by side"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"✅ Loaded {audio_path}")
        
        data = {
            "data": audio_base64,
            "prompt": prompt
        }
        
        print(f"🚀 Comparing both methods...")
        response = requests.post(
            f"{SERVER_URL}compare_audio",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=120
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            
            old_result = result['comparison_results']['without_preprocessing']['text']
            new_result = result['comparison_results']['with_preprocessing']['text']
            
            print(f"\n📊 COMPARISON RESULTS:")
            print(f"OLD method: {old_result}")
            print(f"NEW method: {new_result}")
            
        else:
            print(f"❌ Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"❌ {audio_path} not found.")
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    choice = input("Choose test:\n1. NEW improved audio processing\n2. OLD audio processing\n3. Compare both\nEnter choice (1-3): ")
    
    if choice == "1":
        test_improved_audio()
    elif choice == "2":
        test_old_audio()
    else:
        compare_both()

✅ Loaded sample_audio.wav
🚀 Comparing both methods...
Status: 200

📊 COMPARISON RESULTS:
OLD method: मुझे माफ़ कीजिए, मैं आपकी मदद नहीं कर सकता क्योंकि ऑडियो में कोई आवाज़ नहीं है। क्या आप कृपया ऑडियो फ़ाइल फिर से अपलोड कर सकते हैं?
NEW method: जय माँ जी, ज़ोन बी की सिक्योरिटी अपडेट दीजिए प्लीज़।


In [35]:
# Audio Data Benchmarking

import requests
import base64
import time

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def run_10_tests():
    audio_path = "sample_audio2.wav"
    prompt = "Transcribe this audio into Hindi"
    
    # Load audio
    with open(audio_path, "rb") as f:
        audio_data = f.read()
    audio_base64 = base64.b64encode(audio_data).decode("utf-8")
    
    print("🔴 OLD METHOD (10 calls):")
    old_results = []
    for i in range(10):
        data = {"data": audio_base64, "prompt": prompt, "enable_preprocessing": False}
        response = requests.post(f"{SERVER_URL}ask", json=data, timeout=60)
        result = response.json()['text'] if response.status_code == 200 else "ERROR"
        old_results.append(result)
        print(f"{i+1}: {result}")
        time.sleep(0.5)
    
    print("\n🟢 NEW METHOD (10 calls):")
    new_results = []
    for i in range(10):
        data = {"data": audio_base64, "prompt": prompt, "enable_preprocessing": True}
        response = requests.post(f"{SERVER_URL}ask", json=data, timeout=60)
        result = response.json()['text'] if response.status_code == 200 else "ERROR"
        new_results.append(result)
        print(f"{i+1}: {result}")
        time.sleep(0.5)
    
    print(f"\n📊 SUMMARY:")
    print(f"OLD unique responses: {len(set(old_results))}")
    print(f"NEW unique responses: {len(set(new_results))}")

if __name__ == "__main__":
    run_10_tests()










🔴 OLD METHOD (10 calls):
1: मुझे किसी सुरक्षा अपडेट की आवश्यकता है।
2: ज़ोन बी की सिक्योरिटी अपडेट दीजिए।
3: मुझे सिक्योरिटी अपडेट दीजिए।
4: ज़ोन बी की सिक्योरिटी अपडेट दीजिए।
5: माफ़ कीजिए, मैं आपकी बात नहीं समझ पाया। क्या आप कृपया अपनी बात को फिर से दोहरा सकते हैं?
6: जोन बी की सिक्योरिटी अपडेट दीजिये।
7: ज़ोन बी की सिक्योरिटी अपडेट दीजिए।
8: ज़ोन बी की सिक्योरिटी अपडेट दीजिये।
9: मुझे खेद है, लेकिन मैं इस ऑडियो को हिंदी में ट्रांसक्राइब नहीं कर सकता। मैं केवल टेक्स्ट ट्रांसक्राइब करने के लिए प्रशिक्षित हूं, ऑडियो नहीं। क्या आप टेक्स्ट प्रदान कर सकते हैं जिसे आप ट्रांसक्राइब करवाना चाहते हैं?
10: ज़ोन बी की सिक्योरिटी अपडेट दीजिये।

🟢 NEW METHOD (10 calls):
1: मुझे माफ़ करना, लेकिन मैं इस ऑडियो को हिंदी में ट्रांसक्राइब नहीं कर सकता क्योंकि यह एक दोहराव वाला शोर है जिसमें कोई वास्तविक शब्द या वाक्य नहीं है।

क्या आप कृपया ऑडियो को फिर से रिकॉर्ड कर सकते हैं या कोई अन्य ऑडियो फ़ाइल प्रदान कर सकते हैं जिसे मैं ट्रांसक्राइब कर सकूँ?
2: ज़ोन बी की सिक्योरिटी अपडेट दीजिए।
3: मुझे क्षमा कर

In [45]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_no_processing():
    """Test audio with NO preprocessing - force it off"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"✅ Loaded {audio_path}")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "processing_mode": "force_off"  # Force NO preprocessing
        }
        
        print(f"⏭️ Testing with NO preprocessing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"✅ Result: {result['text']}")
            print(f"🔧 Processing Applied: {result['processing_applied']}")
            print(f"📋 Status: {result['status']}")
        else:
            print(f"❌ Error: {response.text}")
            
    except FileNotFoundError:
        print(f"❌ {audio_path} not found.")
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    test_no_processing()

✅ Loaded sample_audio.wav
⏭️ Testing with NO preprocessing...
Status: 200
✅ Result: जमा जी ज़ोन बी की सिक्योरिटी अपडेट दीजिए प्लीज।
🔧 Processing Applied: False
📋 Status: ✅ Adaptive processing: No preprocessing needed (good quality audio)
