## Test Nova Sonic via Web Socket


In [None]:
!pip install websockets pydub

In [3]:
# helper function to read all json files in a directory into pandas dataframe
import pandas as pd
import os
import json
from typing import List, Dict, Any, Optional


def read_json_to_dataframe(directory_path: str = "./responses/model_sonic") -> pd.DataFrame:
    """
    Read all JSON files in the specified directory into a pandas DataFrame.
    
    Args:
        directory_path: Path to the directory containing JSON files
        
    Returns:
        pandas DataFrame with the following columns:
        - input_file_path: from "input_file" in JSON
        - duration_seconds: from "duration_seconds" in JSON
        - text_responses: concatenated "raw_text" attributes from "text_responses" array
        - output_file_path: identified audio output file based on JSON filename prefix
    """
    # List to store data from each JSON file
    data_list = []
    
    # Get all JSON files in the directory
    json_files = [f for f in os.listdir(directory_path) if f.endswith('.json')]
    
    for json_file in json_files:
        # Construct full path to the JSON file
        json_path = os.path.join(directory_path, json_file)
        
        try:
            # Read JSON file
            with open(json_path, 'r') as f:
                json_data = json.load(f)
            
            # Extract required fields
            input_file_path = json_data.get('input_file', '')
            
            # Extract duration_seconds from the audio object
            audio_data = json_data.get('audio', {})
            duration_seconds = audio_data.get('duration_seconds', None)
            
            # Extract and concatenate raw_text from text_responses
            text_responses_array = json_data.get('text_responses', [])
            text_responses = '\n'.join([resp.get('raw_text', '') for resp in text_responses_array])
            
            # Identify corresponding audio output file
            # Extract prefix from JSON filename (everything before _session_summary)
            file_prefix = json_file.split('_session_summary')[0]
            file_postfix = json_file.split('_session_summary')[1].replace('.json', '')
            
            output_file_name = f"{file_prefix}_response{file_postfix}.wav"
            output_file_path = os.path.join(directory_path, output_file_name)
            
            # Check if the output file exists
            if not os.path.exists(output_file_path):
                output_file_path = None
            
            # Create a dictionary with the extracted data
            data_dict = {
                'input_file_path': input_file_path,
                'duration_seconds': duration_seconds,
                'text_responses': text_responses,
                'output_file_path': output_file_path
            }
            
            data_list.append(data_dict)
            
        except Exception as e:
            print(f"Error processing {json_file}: {str(e)}")
    
    # Create DataFrame from the list of dictionaries
    df = pd.DataFrame(data_list)
    
    return df

In [None]:
import asyncio
from s2s_test_harness import run_test_loop
import os
import uuid
       
WS_URL = f'http://localhost:8000/ws'
user_id = os.getenv("EMAIL")
password = os.getenv("COGNITO_PASSWORD")


audio_files = [f for f in os.listdir("./audio_samples") if f.endswith(".raw")]
# loop over all audio files in the audio_samples directory
results = await run_test_loop(
    audio_files=[f"./audio_samples/{f}" for f in audio_files],
    server_url=WS_URL,
    user_id=user_id,
    password=password,
    delay_between_tests=15.0  # Increase to 3 seconds for safety
)

In [5]:
# Move files into a subdirectory

# Test case with Sonic and function calling

import os
os.makedirs("./responses/model_sonic", exist_ok=True)
for file in os.listdir("./responses"):
    # print(file)
    # skip if the file is not a json or raw file
    if not file.endswith(".json") and not file.endswith(".wav"):
        continue
    source_path = os.path.join("./responses/", file)
    destination_path = os.path.join("./responses/model_sonic", file)
    os.rename(source_path, destination_path)

In [None]:
# read results into a dataframe

df_1 = read_json_to_dataframe("./responses/model_sonic")
df_1


In [7]:
# save the dataframe to a csv file
df_1.to_csv("./responses/model_sonic/s2s_sonic_function_calling_responses.csv", index=False)

## Next steps

- create recordings with noise in the background
- test multi-turn conversations