# Chatbot - Gradio X llama-cpp-python

In [None]:
import gradio as gr
from IPython.display import IFrame, display
import os
from llama_cpp import Llama
import json

In [None]:
gr.close_all()


## Running a Gradio App on JupyterHub

This code demonstrates how to run a **Gradio app** (a simple web-based interface for Python functions) inside a **JupyterHub environment** such as DataHub or CloudBank.

Gradio normally launches on `localhost`, but on JupyterHub the server runs behind a proxy ‚Äî so we use the environment variable `JUPYTERHUB_SERVICE_PREFIX` to route the app correctly through the proxy system.



In [None]:
demo1 = gr.Interface(fn=lambda x: f"Hello {x}!", inputs="text", outputs="text")

base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo1.launch(
    share=False,
    prevent_thread_lock=True,
    server_port=7860,
    root_path=f"{base_url}proxy/7860",
    inline=False
)

proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))

In [None]:
demo1.close()

In [None]:
print(base_url)


## Set up the llama-cpp-python framework

and Run llama-cpp-python behind the Chatbot

In [None]:
path="/home/jovyan/shared/"

In [None]:
model = Llama(
    model_path=os.path.join(path, "qwen2-1_5b-instruct-q4_0.gguf"),
    n_ctx=2048,
    n_threads=None,
    verbose=True,
    chat_format="chatml"
)

In [None]:
# Define function to call the model
def chat_with_model(prompt):
    messages = [{"role": "user", "content": prompt}]
    response = model.create_chat_completion(messages=messages, max_tokens=256)
    return response["choices"][0]["message"]["content"]

# Define Gradio interface
demo2 = gr.Interface(fn=chat_with_model, inputs="text", outputs="text", title="Small Model Chat")

# Launch on JupyterHub proxy
base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo2.launch(
    share=False,
    prevent_thread_lock=True,
    server_port=7860,
    root_path=f"{base_url}proxy/7860",
    inline=False
)

# Display inline in the notebook
proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))

In [None]:
demo2.close()

## Lets Build the History of the Chat 

This will be a Json file that stores the chat history 

In [None]:
demo2.close()

In [None]:
# ===== Persistent History File =====
HISTORY_FILE = "chat_history.json"

def load_history():
    if os.path.exists(HISTORY_FILE):
        try:
            with open(HISTORY_FILE, "r") as f:
                content = f.read().strip()
                if not content:
                    return []  # empty file
                return json.loads(content)
        except json.JSONDecodeError:
            print("‚ö†Ô∏è Warning: history file is corrupted or empty, resetting it.")
            return []
    return []

def save_history(history):
    with open(HISTORY_FILE, "w") as f:
        json.dump(history, f, indent=2)

history = load_history()



In [None]:
# ===== Chat Function =====
def chat_with_model(user_input):
    """Append user input to history, generate response, and persist conversation."""
    # Reload history each time in case file changed externally
    global history
    history = load_history()

    # Build messages list from history
    messages = []
    for h in history:
        messages.append({"role": "user", "content": h['user']})
        messages.append({"role": "assistant", "content": h['model']})
    messages.append({"role": "user", "content": user_input})

    # Generate model response
    response = model.create_chat_completion(messages=messages, max_tokens=100)
    response_text = response["choices"][0]["message"]["content"]

    # Update and persist history
    history.append({"user": user_input, "model": response_text})
    save_history(history)

    return response_text


In [None]:
# ===== Gradio Interface =====
demo3 = gr.Interface(
    fn=chat_with_model,
    inputs="text",
    outputs="text",
    title="Persistent Small Model Chat",
    description="Chat with a local llama-cpp-python model that remembers previous conversations."
)


base_url = os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')
demo3.launch(
    share=False,
    prevent_thread_lock=True,
    server_port=7860,
    root_path=f"{base_url}proxy/7860",
    inline=False
)

proxy_url = f"{base_url}proxy/7860/"
display(IFrame(src=proxy_url, width=1000, height=600))

## Now we can check the history that we have 

In [None]:
for turn in json.load(open("chat_history.json")):
    print(f"üßë User: {turn['user']}\nü§ñ Model: {turn['model']}\n")

In [None]:
#if you want to clear the history 
#open("chat_history.json", "w").write("[]")

In [None]:
demo3.close()

In [None]:
gr.close_all()
