Skip to content

jmerelnyc/code-stream

Repository files navigation

code-stream

Gradio-based streaming interface for AI-powered code assistance

pip install gradio openai
import gradio as gr
from openai import OpenAI

client = OpenAI()

def stream_code(prompt, history):
    messages = [{"role": "system", "content": "You are a code assistant."}]
    for h in history:
        messages.append({"role": "user", "content": h[0]})
        messages.append({"role": "assistant", "content": h[1]})
    messages.append({"role": "user", "content": prompt})
    
    response = ""
    for chunk in client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        stream=True
    ):
        if chunk.choices[0].delta.content:
            response += chunk.choices[0].delta.content
            yield response

demo = gr.ChatInterface(
    stream_code,
    type="messages",
    title="code-stream"
)

demo.launch()

notes

streams tokens as they arrive instead of waiting for complete response. works with any openai-compatible endpoint.

set OPENAI_API_KEY before running. add share=True to .launch() for public link.

customize system prompt, add code execution, syntax highlighting, whatever. it's just gradio and an llm client.

# custom endpoint, streaming with context
import gradio as gr
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:8000/v1",
    api_key="none"
)

def stream_with_context(prompt, history, temperature, max_tokens):
    messages = [{"role": "system", "content": "Expert in Python, Rust, Go."}]
    
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    
    messages.append({"role": "user", "content": prompt})
    
    full_response = ""
    for chunk in client.chat.completions.create(
        model="codellama-34b",
        messages=messages,
        stream=True,
        temperature=temperature,
        max_tokens=max_tokens
    ):
        delta = chunk.choices[0].delta.content
        if delta:
            full_response += delta
            yield full_response

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type="messages")
    msg = gr.Textbox(placeholder="describe what you need...")
    temp = gr.Slider(0, 2, value=0.7, label="temperature")
    tokens = gr.Slider(128, 4096, value=2048, step=128, label="max_tokens")
    
    msg.submit(
        stream_with_context,
        [msg, chatbot, temp, tokens],
        chatbot
    )

demo.launch(server_port=7860)

MIT

About

Gradio-based streaming interface for AI-powered code assistance

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors