Gradio-based streaming interface for AI-powered code assistance
pip install gradio openai
import gradio as gr
from openai import OpenAI
client = OpenAI()
def stream_code(prompt, history):
messages = [{"role": "system", "content": "You are a code assistant."}]
for h in history:
messages.append({"role": "user", "content": h[0]})
messages.append({"role": "assistant", "content": h[1]})
messages.append({"role": "user", "content": prompt})
response = ""
for chunk in client.chat.completions.create(
model="gpt-4",
messages=messages,
stream=True
):
if chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response
demo = gr.ChatInterface(
stream_code,
type="messages",
title="code-stream"
)
demo.launch()streams tokens as they arrive instead of waiting for complete response. works with any openai-compatible endpoint.
set OPENAI_API_KEY before running. add share=True to .launch() for public link.
customize system prompt, add code execution, syntax highlighting, whatever. it's just gradio and an llm client.
# custom endpoint, streaming with context
import gradio as gr
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="none"
)
def stream_with_context(prompt, history, temperature, max_tokens):
messages = [{"role": "system", "content": "Expert in Python, Rust, Go."}]
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": prompt})
full_response = ""
for chunk in client.chat.completions.create(
model="codellama-34b",
messages=messages,
stream=True,
temperature=temperature,
max_tokens=max_tokens
):
delta = chunk.choices[0].delta.content
if delta:
full_response += delta
yield full_response
with gr.Blocks() as demo:
chatbot = gr.Chatbot(type="messages")
msg = gr.Textbox(placeholder="describe what you need...")
temp = gr.Slider(0, 2, value=0.7, label="temperature")
tokens = gr.Slider(128, 4096, value=2048, step=128, label="max_tokens")
msg.submit(
stream_with_context,
[msg, chatbot, temp, tokens],
chatbot
)
demo.launch(server_port=7860)MIT