In [20]:
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import FileResponse

app = FastAPI()


In [21]:
import whisper

asr_model = whisper.load_model("small")

def transcribe_audio(audio_bytes):
    with open("temp.wav", "wb") as f:
        f.write(audio_bytes)
    result = asr_model.transcribe("temp.wav")
    return result["text"]


In [22]:
import os
from openai import OpenAI

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

conversation_history = []

def generate_response(user_text):
    # Add user message to history
    conversation_history.append({"role": "user", "content": user_text})
    
    # Call OpenAI API
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=conversation_history,
        max_tokens=150
    )
    
    # Extract LLM response
    bot_response = response.choices[0].message.content
    
    # Add to history
    conversation_history.append({"role": "assistant", "content": bot_response})
    
    return bot_response


In [None]:
# COSYVOICE TTS (Cosyvoice does not work properly both in this agent and in demo, potentially an issue with Windows, will try WSL in the future.)
# import sys
# import os
# cosyvoice_path = r'C:\Users\Kevin\Desktop\ai_class\KevinYuan_Homework\CosyVoice'
# sys.path.append(cosyvoice_path)
# sys.path.append(os.path.join(cosyvoice_path, 'third_party', 'Matcha-TTS'))
# from cosyvoice.cli.cosyvoice import CosyVoice2
# from cosyvoice.utils.file_utils import load_wav
# import torchaudio
# model_path = os.path.join(cosyvoice_path, 'pretrained_models', 'CosyVoice2-0.5B')
# cosyvoice = CosyVoice2(model_path, load_jit=False, load_trt=False, load_vllm=False, fp16=False)
# reference_audio_path = os.path.join(cosyvoice_path, 'asset', 'ENG_US_M_DAVEL.wav')
# # Transcription of the reference audio
# prompt_text = "Being able to communicate positions within a room is critical to our ability to focus light on a certain area and place objects in their proper location on stage. But it goes even deeper than that, this proficiency provides the basic vocabulary in a common language that is spoken by production and staging professionals around the world."
# prompt_speech_16k = load_wav(reference_audio_path, 16000)
# def synthesize_speech(text, output_path="response.wav"):
#     # Add text_frontend=False for better English synthesis (per CosyVoice docs)
#     for i, result in enumerate(cosyvoice.inference_zero_shot(
#         text,              # Text to synthesize
#         prompt_text,       # Transcript of reference audio
#         prompt_speech_16k, # Reference audio
#         stream=False,
#         text_frontend=False  # Important for English text
#     )):
#         torchaudio.save(output_path, result['tts_speech'], cosyvoice.sample_rate)
#         break
#     return output_path

def synthesize_speech(text, output_path="response.wav"):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy", 
        input=text
    )
    response.stream_to_file(output_path)
    return output_path



Using OpenAI TTS-1 for speech synthesis


In [24]:
@app.post("/chat/")
async def chat_endpoint(file: UploadFile = File(...)):
    audio_bytes = await file.read()
    user_text = transcribe_audio(audio_bytes)
    bot_text = generate_response(user_text)
    audio_path = synthesize_speech(bot_text)
    return FileResponse(audio_path, media_type="audio/wav")

In [None]:
import gradio as gr

def voice_chat(audio):
    if audio is None:
        return None, ""
    
    with open(audio, "rb") as f:
        audio_bytes = f.read()
    
    user_text = transcribe_audio(audio_bytes)
    print(f"User said: {user_text}")
    
    bot_text = generate_response(user_text)
    print(f"Bot responds: {bot_text}")
    
    audio_path = synthesize_speech(bot_text, "gradio_response.wav")
    
    return audio_path, bot_text

gr.Interface(
    fn=voice_chat,
    inputs=gr.Audio(sources=["microphone"], type="filepath"),
    outputs=[
        gr.Audio(label="Bot Voice Response"),
        gr.Textbox(label="Bot Text Response", lines=5)
    ]
).launch()


2025-11-11 21:26:06,068 DEBUG Starting new HTTPS connection (1): huggingface.co:443
2025-11-11 21:26:06,167 DEBUG Using proactor: IocpProactor
2025-11-11 21:26:06,284 DEBUG connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None


* Running on local URL:  http://127.0.0.1:7865


2025-11-11 21:26:06,437 DEBUG https://huggingface.co:443 "HEAD /api/telemetry/https%3A/api.gradio.app/gradio-initiated-analytics HTTP/1.1" 200 0
2025-11-11 21:26:06,443 DEBUG connect_tcp.started host='127.0.0.1' port=7865 local_address=None timeout=None socket_options=None
2025-11-11 21:26:06,443 DEBUG connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001E70CF90BF0>
2025-11-11 21:26:06,444 DEBUG send_request_headers.started request=<Request [b'GET']>
2025-11-11 21:26:06,445 DEBUG connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001E6CF1A85C0>
2025-11-11 21:26:06,445 DEBUG send_request_headers.complete
2025-11-11 21:26:06,445 DEBUG start_tls.started ssl_context=<ssl.SSLContext object at 0x000001E6CEFF0DD0> server_hostname='api.gradio.app' timeout=3
2025-11-11 21:26:06,446 DEBUG send_request_body.started request=<Request [b'GET']>
2025-11-11 21:26:06,447 DEBUG send_request_body.complete
2025-11-11 21:26:06,447 DEBUG r

* To create a public link, set `share=True` in `launch()`.




2025-11-11 21:26:06,463 DEBUG Starting new HTTPS connection (1): huggingface.co:443


2025-11-11 21:26:06,566 DEBUG start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x000001E70CFD8590>
2025-11-11 21:26:06,567 DEBUG send_request_headers.started request=<Request [b'GET']>
2025-11-11 21:26:06,567 DEBUG send_request_headers.complete
2025-11-11 21:26:06,569 DEBUG send_request_body.started request=<Request [b'GET']>
2025-11-11 21:26:06,569 DEBUG send_request_body.complete
2025-11-11 21:26:06,569 DEBUG receive_response_headers.started request=<Request [b'GET']>
2025-11-11 21:26:06,628 DEBUG receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 12 Nov 2025 02:26:07 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
2025-11-11 21:26:06,629 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
2025-11-11 21:26:06,629 DEBUG receive_response_body.started request=<Request

User said:  Can you please tell me the MBA MVP candidates for the 2021-2022 season?


2025-11-11 21:26:32,430 DEBUG receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 12 Nov 2025 02:26:33 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-exboj2k3mtfkhfyzzdpjqomu'), (b'openai-processing-ms', b'1864'), (b'openai-project', b'proj_01dqWjCBFviU338E5P9LRXGd'), (b'openai-version', b'2020-10-01'), (b'x-envoy-upstream-service-time', b'2135'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'199792'), (b'x-ratelimit-reset-requests', b'8.64s'), (b'x-ratelimit-reset-tokens', b'62ms'), (b'x-request-id', b'req_0494db97fb31453aaf8c264e6d67e2ea'), (b'x-openai-proxy-wasm', b'v0.1'), (b'cf-cache-status', b'DYNAMIC'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains;

Bot responds: Sure! For the 2021-2022 NBA season, several players were prominent candidates for the MVP award based on their outstanding performances. Here are some of the top candidates:

1. **Nikola Jokić** (Denver Nuggets) - The reigning MVP had an exceptional season, providing scoring, rebounding, and playmaking, and he was the focal point of the Nuggets' offense.

2. **Giannis Antetokounmpo** (Milwaukee Bucks) - Giannis continued to dominate on both ends of the court, leading the Bucks as they aimed to defend their championship title.

3. **Joel Embiid** (Philadelphia 76ers) - Embiid had a remarkable season, showcasing his scoring, rebounding,


2025-11-11 21:26:35,135 DEBUG receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 12 Nov 2025 02:26:36 GMT'), (b'Content-Type', b'audio/mpeg'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-exboj2k3mtfkhfyzzdpjqomu'), (b'openai-processing-ms', b'983'), (b'openai-project', b'proj_01dqWjCBFviU338E5P9LRXGd'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'via', b'envoy-router-648b9546d7-fq7v4'), (b'x-envoy-upstream-service-time', b'1576'), (b'x-ratelimit-limit-requests', b'500'), (b'x-ratelimit-remaining-requests', b'499'), (b'x-ratelimit-reset-requests', b'120ms'), (b'x-request-id', b'req_e86573d62e4540c683239d6b6a0ab4fe'), (b'x-openai-proxy-wasm', b'v0.1'), (b'cf-cache-status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b

Created dataset file at: .gradio\flagged\dataset2.csv


2025-11-11 21:28:12,965 DEBUG Calling on_part_begin with no data
2025-11-11 21:28:12,966 DEBUG Calling on_header_field with data[42:61]
2025-11-11 21:28:12,966 DEBUG Calling on_header_value with data[63:108]
2025-11-11 21:28:12,967 DEBUG Calling on_header_end with no data
2025-11-11 21:28:12,967 DEBUG Calling on_header_field with data[110:122]
2025-11-11 21:28:12,967 DEBUG Calling on_header_value with data[124:148]
2025-11-11 21:28:12,968 DEBUG Calling on_header_end with no data
2025-11-11 21:28:12,968 DEBUG Calling on_headers_finished with no data
2025-11-11 21:28:12,969 DEBUG Calling on_part_data with data[152:81920]
2025-11-11 21:28:12,970 DEBUG Calling on_part_data with data[0:131072]
2025-11-11 21:28:12,971 DEBUG Calling on_part_data with data[0:9466]
2025-11-11 21:28:12,971 DEBUG Calling on_part_end with no data
2025-11-11 21:28:12,971 DEBUG Calling on_end with no data
2025-11-11 21:28:12,986 DEBUG Calling on_part_begin with no data
2025-11-11 21:28:12,987 DEBUG Calling on_header

User said:  Then who are the fourth and fifth runner-ups for the MVP candidates?


2025-11-11 21:28:26,527 DEBUG receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 12 Nov 2025 02:28:27 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-exboj2k3mtfkhfyzzdpjqomu'), (b'openai-processing-ms', b'2676'), (b'openai-project', b'proj_01dqWjCBFviU338E5P9LRXGd'), (b'openai-version', b'2020-10-01'), (b'x-envoy-upstream-service-time', b'3010'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'199612'), (b'x-ratelimit-reset-requests', b'8.64s'), (b'x-ratelimit-reset-tokens', b'116ms'), (b'x-request-id', b'req_a401767621e34cb2b86b800c6c142efa'), (b'x-openai-proxy-wasm', b'v0.1'), (b'cf-cache-status', b'DYNAMIC'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains

Bot responds: For the 2021-2022 NBA season, in addition to Nikola Jokić, Giannis Antetokounmpo, and Joel Embiid as the top three MVP candidates, the fourth and fifth candidates often mentioned in MVP discussions were:

4. **Luka Dončić** (Dallas Mavericks) - Dončić had another impressive season, showcasing his scoring, playmaking, and ability to lead the Mavericks, significantly contributing to their success.

5. **Devin Booker** (Phoenix Suns) - As a key player for the Suns, who had one of the best records in the league, Booker demonstrated his scoring ability and playmaking skills and was crucial in helping Phoenix secure a top playoff seed.

These players were recognized for


2025-11-11 21:28:28,408 DEBUG receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Wed, 12 Nov 2025 02:28:29 GMT'), (b'Content-Type', b'audio/mpeg'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'access-control-expose-headers', b'X-Request-ID'), (b'openai-organization', b'user-exboj2k3mtfkhfyzzdpjqomu'), (b'openai-processing-ms', b'854'), (b'openai-project', b'proj_01dqWjCBFviU338E5P9LRXGd'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=31536000; includeSubDomains; preload'), (b'via', b'envoy-router-756db46b57-skd92'), (b'x-envoy-upstream-service-time', b'1275'), (b'x-ratelimit-limit-requests', b'500'), (b'x-ratelimit-remaining-requests', b'499'), (b'x-ratelimit-reset-requests', b'120ms'), (b'x-request-id', b'req_3e2639ac19284679922eb4460d741c55'), (b'x-openai-proxy-wasm', b'v0.1'), (b'cf-cache-status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b