# This notebook will server as a Testing notebook for Gemma Voice Commander

# The first feature will be sending audio message to gemma

In [27]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_local_wav_audio():
    """Test local WAV audio with custom prompt"""
    audio_path = "sample_audio.wav"  # Use WAV format instead of MP3
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"‚úÖ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt
        }
        
        print(f"üöÄ Sending to server with prompt: {prompt}")
        response = requests.post(
            f"{SERVER_URL}/ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"‚úÖ Response: {result['text']}")
            print(f"Prompt used: {result.get('prompt_used', 'N/A')}")
        else:
            print(f"‚ùå Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"‚ùå {audio_path} not found. Please convert your MP3 to WAV format.")
        print("üí° You can convert MP3 to WAV using:")
        print("   - Online converters")
        print("   - FFmpeg: ffmpeg -i sample_audio.mp3 sample_audio.wav")
        print("   - Audacity (free audio editor)")
        
    except Exception as e:
        print(f"‚ùå Error: {e}")

def test_google_wav_audio():
    """Test with Google's working WAV file"""
    print("üéµ Testing with Google's WAV file...")
    
    audio_url = "https://ai.google.dev/gemma/docs/audio/roses-are.wav"
    
    try:
        response = requests.get(audio_url, timeout=10)
        
        if response.status_code != 200:
            print(f"‚ùå Failed to download audio: {response.status_code}")
            return
        
        print("‚úÖ Downloaded Google's WAV file")
        
        audio_base64 = base64.b64encode(response.content).decode('utf-8')
        print(f"‚úÖ Converted to base64 ({len(audio_base64)} chars)")
        
        data = {
            "data": audio_base64,
            "prompt": "Translate this audio into English"
        }
        
        print("üöÄ Sending to server...")
        server_response = requests.post(
            f"{SERVER_URL}/ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {server_response.status_code}")
        
        if server_response.status_code == 200:
            result = server_response.json()
            print(f"‚úÖ Response: {result['text']}")
            print(f"‚úÖ Prompt used: {result['prompt_used']}")
        else:
            print(f"‚ùå Server error: {server_response.text}")
            
    except Exception as e:
        print(f"‚ùå Error: {e}")

if __name__ == "__main__":
    choice = input("Choose test:\n1. Local WAV file\n2. Google's WAV file\nEnter choice (1-2): ")
    
    if choice == "1":
        test_local_wav_audio()
    else:
        test_google_wav_audio()

‚úÖ Loaded and encoded sample_audio.wav to base64
üöÄ Sending to server with prompt: Transcribe this audio into Hindi
Status: 200
‚úÖ Response: ‡§Æ‡§æ‡§ú‡•Ä, ‡•õ‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è ‡§™‡•ç‡§≤‡•Ä‡•õ‡•§
Prompt used: Transcribe this audio into Hindi


In [33]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_improved_audio():
    """Test audio with NEW preprocessing for better accuracy"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"‚úÖ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "enable_preprocessing": True  # NEW: Enable preprocessing for better accuracy
        }
        
        print(f"üöÄ Sending to server with IMPROVED processing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"‚úÖ IMPROVED Response: {result['text']}")
            print(f"Status: {result['status']}")
        else:
            print(f"‚ùå Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"‚ùå {audio_path} not found.")
    except Exception as e:
        print(f"‚ùå Error: {e}")

def test_old_audio():
    """Test audio without preprocessing (your old method)"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"‚úÖ Loaded and encoded {audio_path} to base64")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "enable_preprocessing": False  # Disable preprocessing
        }
        
        print(f"üöÄ Sending to server with OLD processing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"‚úÖ OLD Response: {result['text']}")
            print(f"Status: {result['status']}")
        else:
            print(f"‚ùå Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"‚ùå {audio_path} not found.")
    except Exception as e:
        print(f"‚ùå Error: {e}")

def compare_both():
    """Compare both methods side by side"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"‚úÖ Loaded {audio_path}")
        
        data = {
            "data": audio_base64,
            "prompt": prompt
        }
        
        print(f"üöÄ Comparing both methods...")
        response = requests.post(
            f"{SERVER_URL}compare_audio",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=120
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            
            old_result = result['comparison_results']['without_preprocessing']['text']
            new_result = result['comparison_results']['with_preprocessing']['text']
            
            print(f"\nüìä COMPARISON RESULTS:")
            print(f"OLD method: {old_result}")
            print(f"NEW method: {new_result}")
            
        else:
            print(f"‚ùå Server error: {response.text}")
            
    except FileNotFoundError:
        print(f"‚ùå {audio_path} not found.")
    except Exception as e:
        print(f"‚ùå Error: {e}")

if __name__ == "__main__":
    choice = input("Choose test:\n1. NEW improved audio processing\n2. OLD audio processing\n3. Compare both\nEnter choice (1-3): ")
    
    if choice == "1":
        test_improved_audio()
    elif choice == "2":
        test_old_audio()
    else:
        compare_both()

‚úÖ Loaded sample_audio.wav
üöÄ Comparing both methods...
Status: 200

üìä COMPARISON RESULTS:
OLD method: ‡§Æ‡•Å‡§ù‡•á ‡§Æ‡§æ‡§´‡§º ‡§ï‡•Ä‡§ú‡§ø‡§è, ‡§Æ‡•à‡§Ç ‡§Ü‡§™‡§ï‡•Ä ‡§Æ‡§¶‡§¶ ‡§®‡§π‡•Ä‡§Ç ‡§ï‡§∞ ‡§∏‡§ï‡§§‡§æ ‡§ï‡•ç‡§Ø‡•ã‡§Ç‡§ï‡§ø ‡§ë‡§°‡§ø‡§Ø‡•ã ‡§Æ‡•á‡§Ç ‡§ï‡•ã‡§à ‡§Ü‡§µ‡§æ‡§ú‡§º ‡§®‡§π‡•Ä‡§Ç ‡§π‡•à‡•§ ‡§ï‡•ç‡§Ø‡§æ ‡§Ü‡§™ ‡§ï‡•É‡§™‡§Ø‡§æ ‡§ë‡§°‡§ø‡§Ø‡•ã ‡§´‡§º‡§æ‡§á‡§≤ ‡§´‡§ø‡§∞ ‡§∏‡•á ‡§Ö‡§™‡§≤‡•ã‡§° ‡§ï‡§∞ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç?
NEW method: ‡§ú‡§Ø ‡§Æ‡§æ‡§Å ‡§ú‡•Ä, ‡•õ‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è ‡§™‡•ç‡§≤‡•Ä‡•õ‡•§


In [35]:
# Audio Data Benchmarking

import requests
import base64
import time

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def run_10_tests():
    audio_path = "sample_audio2.wav"
    prompt = "Transcribe this audio into Hindi"
    
    # Load audio
    with open(audio_path, "rb") as f:
        audio_data = f.read()
    audio_base64 = base64.b64encode(audio_data).decode("utf-8")
    
    print("üî¥ OLD METHOD (10 calls):")
    old_results = []
    for i in range(10):
        data = {"data": audio_base64, "prompt": prompt, "enable_preprocessing": False}
        response = requests.post(f"{SERVER_URL}ask", json=data, timeout=60)
        result = response.json()['text'] if response.status_code == 200 else "ERROR"
        old_results.append(result)
        print(f"{i+1}: {result}")
        time.sleep(0.5)
    
    print("\nüü¢ NEW METHOD (10 calls):")
    new_results = []
    for i in range(10):
        data = {"data": audio_base64, "prompt": prompt, "enable_preprocessing": True}
        response = requests.post(f"{SERVER_URL}ask", json=data, timeout=60)
        result = response.json()['text'] if response.status_code == 200 else "ERROR"
        new_results.append(result)
        print(f"{i+1}: {result}")
        time.sleep(0.5)
    
    print(f"\nüìä SUMMARY:")
    print(f"OLD unique responses: {len(set(old_results))}")
    print(f"NEW unique responses: {len(set(new_results))}")

if __name__ == "__main__":
    run_10_tests()










üî¥ OLD METHOD (10 calls):
1: ‡§Æ‡•Å‡§ù‡•á ‡§ï‡§ø‡§∏‡•Ä ‡§∏‡•Å‡§∞‡§ï‡•ç‡§∑‡§æ ‡§Ö‡§™‡§°‡•á‡§ü ‡§ï‡•Ä ‡§Ü‡§µ‡§∂‡•ç‡§Ø‡§ï‡§§‡§æ ‡§π‡•à‡•§
2: ‡§ú‡§º‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è‡•§
3: ‡§Æ‡•Å‡§ù‡•á ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è‡•§
4: ‡§ú‡§º‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è‡•§
5: ‡§Æ‡§æ‡§´‡§º ‡§ï‡•Ä‡§ú‡§ø‡§è, ‡§Æ‡•à‡§Ç ‡§Ü‡§™‡§ï‡•Ä ‡§¨‡§æ‡§§ ‡§®‡§π‡•Ä‡§Ç ‡§∏‡§Æ‡§ù ‡§™‡§æ‡§Ø‡§æ‡•§ ‡§ï‡•ç‡§Ø‡§æ ‡§Ü‡§™ ‡§ï‡•É‡§™‡§Ø‡§æ ‡§Ö‡§™‡§®‡•Ä ‡§¨‡§æ‡§§ ‡§ï‡•ã ‡§´‡§ø‡§∞ ‡§∏‡•á ‡§¶‡•ã‡§π‡§∞‡§æ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç?
6: ‡§ú‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§Ø‡•á‡•§
7: ‡§ú‡§º‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è‡•§
8: ‡§ú‡§º‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§Ø‡•á‡•§
9: ‡§Æ‡•Å‡§ù‡•á ‡§ñ‡•á‡§¶ ‡§π‡•à, ‡§≤‡•á‡§ï‡§ø‡§® ‡§Æ‡•à‡§Ç ‡§á‡§∏ ‡§ë

In [45]:
import requests
import base64

SERVER_URL = "https://3gdf7gz3vpdp0z-8000.proxy.runpod.net/"

def test_no_processing():
    """Test audio with NO preprocessing - force it off"""
    audio_path = "sample_audio.wav"
    prompt = "Transcribe this audio into Hindi"
    
    try:
        with open(audio_path, "rb") as f:
            audio_data = f.read()
        
        audio_base64 = base64.b64encode(audio_data).decode("utf-8")
        print(f"‚úÖ Loaded {audio_path}")
        
        data = {
            "data": audio_base64,
            "prompt": prompt,
            "processing_mode": "force_off"  # Force NO preprocessing
        }
        
        print(f"‚è≠Ô∏è Testing with NO preprocessing...")
        response = requests.post(
            f"{SERVER_URL}ask",
            json=data,
            headers={"Content-Type": "application/json"},
            timeout=60
        )
        
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print(f"‚úÖ Result: {result['text']}")
            print(f"üîß Processing Applied: {result['processing_applied']}")
            print(f"üìã Status: {result['status']}")
        else:
            print(f"‚ùå Error: {response.text}")
            
    except FileNotFoundError:
        print(f"‚ùå {audio_path} not found.")
    except Exception as e:
        print(f"‚ùå Error: {e}")

if __name__ == "__main__":
    test_no_processing()

‚úÖ Loaded sample_audio.wav
‚è≠Ô∏è Testing with NO preprocessing...
Status: 200
‚úÖ Result: ‡§ú‡§Æ‡§æ ‡§ú‡•Ä ‡•õ‡•ã‡§® ‡§¨‡•Ä ‡§ï‡•Ä ‡§∏‡§ø‡§ï‡•ç‡§Ø‡•ã‡§∞‡§ø‡§ü‡•Ä ‡§Ö‡§™‡§°‡•á‡§ü ‡§¶‡•Ä‡§ú‡§ø‡§è ‡§™‡•ç‡§≤‡•Ä‡§ú‡•§
üîß Processing Applied: False
üìã Status: ‚úÖ Adaptive processing: No preprocessing needed (good quality audio)
