In [4]:
import numpy as np
import gradio as gr
from agents import Agent
from agents.voice import SingleAgentVoiceWorkflow, VoicePipeline, AudioInput

agent = Agent(
    name="Assistant", 
    instructions="You are a helpful assistant",
    model="gpt-4o-mini"
)

pipeline = VoicePipeline(workflow=SingleAgentVoiceWorkflow(agent))

async def process_audio(audio_data):
    if audio_data is None:
        return None
    
    sr, audio_array = audio_data
    audio_input = AudioInput(buffer=audio_array)
    result = await pipeline.run(audio_input)
    
    # Collect audio chunks
    audio_chunks = []
    async for event in result.stream():
        if event.type == "voice_stream_event_audio":
            audio_chunks.append(event.data)
    
    # Combine audio chunks
    if audio_chunks:
        combined_audio = np.concatenate(audio_chunks)
        return (24000, combined_audio)  # Return (sample_rate, audio_data)
    
    return None

demo = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(sources=["microphone"], type="numpy"),
    outputs=gr.Audio(type="numpy"),
    title="Voice Assistant",
    description="Speak into the microphone to chat with the AI assistant"
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.


