In [None]:
python main.py

In [None]:
import os
import io
import json
import asyncio
import base64
import websockets
from pydub import AudioSegment
import soundfile as sf
import gradio as gr
from dotenv import load_dotenv

load_dotenv()

async def connect_to_openai_websocket(audio_event):
    url = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"
    headers = {
        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
        "OpenAI-Beta": "realtime=v1",
    }

    async with websockets.connect(url, extra_headers=headers) as ws:
        print("Connected to server.")

        # Send audio event to the server
        await ws.send(audio_event)
        print("Audio event sent.")

        async for message in ws:
            event = json.loads(message)

            # Check if the message is an audio response
            if event.get('type') == 'conversation.item.created':

                # Send a command to create a response
                response_message = {
                    "type": "response.create"
                }
                await ws.send(json.dumps(response_message))
                print("Response create command sent.")

                audio_data_list = []

                # Listen for messages from the server
                async for message in ws:
                    event = json.loads(message)

                    # Check if the message is an audio response
                    if event.get('type') == 'response.audio.delta':
                        audio_data_list.append(event['delta'])

                    # Check if the message is an audio response
                    if event.get('type') == 'response.audio.done':
                        full_audio_base64 = ''.join(audio_data_list)  

                        audio_data = base64.b64decode(full_audio_base64)
                        return audio_data

def numpy_to_audio_bytes(audio_np, sample_rate):
    with io.BytesIO() as buffer:
        # Write the audio data to the buffer in WAV format
        sf.write(buffer, audio_np, samplerate=sample_rate, format='WAV')
        buffer.seek(0)  # Move to the beginning of the buffer
        wav_bytes = buffer.read()
    return wav_bytes

def audio_to_item_create_event(audio_data: tuple) -> str:
    sample_rate, audio_np = audio_data
    audio_bytes = numpy_to_audio_bytes(audio_np, sample_rate)
    
    pcm_base64 = base64.b64encode(audio_bytes).decode('utf-8')
    
    event = {
        "type": "conversation.item.create",
        "item": {
            "type": "message",
            "role": "user",
            "content": [{
                "type": "input_audio",
                "audio": pcm_base64
            }]
        }
    }
    return json.dumps(event)

def voice_chat_response(audio_data, history):
    audio_event = audio_to_item_create_event(audio_data)
    audio_response = asyncio.run(connect_to_openai_websocket(audio_event))

    if isinstance(audio_response, bytes):
        audio_io = io.BytesIO(audio_response)
        audio_segment = AudioSegment.from_raw(
            audio_io, 
            sample_width=2, 
            frame_rate=24000, 
            channels=1
        )
        
        # Output audio as file-compatible stream for Gradio playback
        with io.BytesIO() as buffered:
            audio_segment.export(buffered, format="wav")
            return buffered.getvalue(), history  #

    return None, history

# Gradio Interface Setup
with gr.Blocks(title="OpenAI Realtime API") as demo:
    gr.Markdown("<h1 style='text-align: center;'>OpenAI Realtime API</h1>")

    with gr.Tab("VoiceChat"):
        gr.Markdown("Speak to interact with the OpenAI model in real-time and hear its responses.")

        audio_input = gr.Audio(
            label="Record your voice",
            sources="microphone",
            type="numpy",
            render=True
        )
        
        audio_output = gr.Audio(
            autoplay=True,
            render=True
        )
        
        history_state = gr.State([])

        gr.Interface(
            fn=voice_chat_response,
            inputs=[audio_input, history_state],
            outputs=[audio_output, history_state]
        )

if __name__ == "__main__":
    demo.launch()

In [13]:
from openai import OpenAI
import openai
import requests
import json

# Define the function to get weather data
def get_weather_data(latitude: float, longitude: float) -> dict:
    url = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current_weather=true&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": f"Failed to retrieve data, status code {response.status_code}"}

# Define the tools list for the Assistant
tools_list = [{
    "type": "function",
    "function": {
        "name": "get_weather_data",
        "description": "Retrieve the current weather data for a specified location",
        "parameters": {
            "type": "object",
            "properties": {
                "latitude": {
                    "type": "number",
                    "description": "The latitude of the location"
                },
                "longitude": {
                    "type": "number",
                    "description": "The longitude of the location"
                }
            },
            "required": ["latitude", "longitude"]
        }
    }
}]

# Initialize the OpenAI client
client = OpenAI(api_key="XXX")

# Step 1: Create an Assistant
assistant = client.beta.assistants.create(
    name="English Speaking Assistant",
    instructions="""
    You are an English-speaking practice assistant. 
    Your goal is to help users improve their English by providing feedback, 
    practicing real-life scenarios, 
    and correcting grammar and pronunciation. 
    Be encouraging and constructive in your responses.
    You can also provide weather-related conversations when asked.
    """,
    tools=tools_list,
    model="gpt-4-1106-preview",
)

# Step 2: Create a Thread
thread = client.beta.threads.create()

# Step 3: Add a Message to a Thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="How is the weather in seoul??"
)


In [14]:

# Step 4: Run the Assistant and get immediate results
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions="Answer the user's questions"
)

# Check if the Assistant requires a tool function
# Check if the Assistant requires a tool function
if run.status == "requires_action":
    required_actions = run.required_action.submit_tool_outputs.model_dump()
    tool_outputs = []
    
    for action in required_actions["tool_calls"]:
        func_name = action['function']['name']
        arguments = json.loads(action['function']['arguments'])
        
        if func_name == "get_weather_data":
            latitude = arguments["latitude"]
            longitude = arguments["longitude"]
            # Fetch the weather data
            output = get_weather_data(latitude=latitude, longitude=longitude)
            
            # Convert the output to a JSON string
            tool_outputs.append({
                "tool_call_id": action['id'],
                "output": json.dumps(output)  # Serialize to JSON string
            })
        else:
            raise ValueError(f"Unknown function: {func_name}")
    
    # Submit the tool outputs back to the Assistant
    client.beta.threads.runs.submit_tool_outputs(
        thread_id=thread.id,
        run_id=run.id,
        tool_outputs=tool_outputs
    )


In [16]:
messages = client.beta.threads.messages.list(thread_id=thread.id)

In [18]:
messages

SyncCursorPage[Message](data=[Message(id='msg_SUPPsRf2YFHvu8tC50ctpo13', assistant_id='asst_s82RQ16Xqx0jW98Q1tveUB2H', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="The current weather in Seoul is as follows:\n\n- Temperature: 5.2°C\n- Wind Speed: 5.4 km/h\n- Wind Direction: 352° (North)\n- Weather Condition: Clear weather (WMO code: 0)\n- Time of Weather Report: November 18, 2024, at 07:00 GMT\n\nPlease note that this data may not be up-to-the-minute accurate, as it's reflecting a weather report from November 18, 2024. For the latest weather updates, always check with a reliable real-time weather service."), type='text')], created_at=1731913880, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_DhEQeMY1DtLXduEgCUENXjWI', status=None, thread_id='thread_ttV3esQD37hxGj5B5dtt0Axx'), Message(id='msg_XAIM2OLOh8YLDkMquqPYAhlA', assistant_id=None, attachments=[], completed_at=None, c

In [25]:
messages.data[0].content[0].text.value

"The current weather in Seoul is as follows:\n\n- Temperature: 5.2°C\n- Wind Speed: 5.4 km/h\n- Wind Direction: 352° (North)\n- Weather Condition: Clear weather (WMO code: 0)\n- Time of Weather Report: November 18, 2024, at 07:00 GMT\n\nPlease note that this data may not be up-to-the-minute accurate, as it's reflecting a weather report from November 18, 2024. For the latest weather updates, always check with a reliable real-time weather service."

In [3]:

# Retrieve the Assistant's response
messages = client.beta.threads.messages.list(thread_id=thread.id)

# Print the final response
for msg in messages.data:
    role = msg.role
    content = msg.content[0].text.value
    print(f"{role.capitalize()}: {content}")

IndexError: list index out of range

In [None]:

# Retrieve the Assistant's response
messages = client.beta.threads.messages.list(thread_id=thread.id)

# Print the final response
for msg in messages.data:
    role = msg.role
    content = msg.content[0].text.value
    print(f"{role.capitalize()}: {content}")

In [None]:
from openai import OpenAI

client = OpenAI(api_key="")

In [None]:
assistant = client.beta.assistants.create(
    name="English Speaking Assistant",
    instructions="""
    You are an English-speaking practice assistant. 
    Your goal is to help users improve their English by providing feedback, 
    practicing real-life scenarios, 
    and correcting grammar and pronunciation. 
    Be encouraging and constructive in your responses.
    """,
    tools=[{"type":"file_search"}],
    model="gpt-4o"
)

In [None]:
vector_store = client.beta.vector_stores.create(
    name="conversation logs"
)

In [None]:
file_paths = [
    "내가_가장_가보고_싶은_여행지.txt", 
    "내가_감명_깊게_본_영화나_드라마.txt",
    "스트레스_해소를_위한_나만의_방법.txt"
]

In [None]:
file_streams = [open(path, "rb") for path in file_paths]

In [None]:
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
    vector_store_id=vector_store.id,
    files=file_streams
)

In [None]:
print(file_batch.status)

In [None]:
print(file_batch.file_counts)

In [None]:
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,
    tool_resources={"file_search":{"vector_store_ids":[vector_store.id]}}
        
)

In [None]:
assistant

In [None]:
vector_store

In [None]:
thread = client.beta.threads.create()

In [None]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Help me find a document containing a conversation about Norway."
)

In [None]:
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant.id
)

In [None]:
messages = client.beta.threads.messages.list(
    thread_id=thread.id
)


In [None]:
messages.data[0].content[0].text.value

In [None]:
thread

In [None]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="I want to practice speaking English. Can you help me?"
)

In [None]:
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id,
    assistant_id=assistant.id
)

In [None]:
messages = client.beta.threads.messages.list(
    thread_id=thread.id
)

In [None]:
messages.data[0]