# Additional End of week Exercise - week 2

Now use everything you've learned from Week 2 to build a full prototype for the technical question/answerer you built in Week 1 Exercise.

This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!

If you feel bold, see if you can add audio input so you can talk to it, and have it respond with audio. ChatGPT or Claude can help you, or email me if you have questions.

I will publish a full solution here soon - unless someone beats me to it...

There are so many commercial applications for this, from a language tutor, to a company onboarding solution, to a companion AI to a course (like this one!) I can't wait to see your results.

# NOTE: Tool call to course notebooks 

This ended up being a bit more complex than I expected, so I only impleneted tool calling for chatgpt (not claude and gemini) as I had planned

I ran into some problems getting streaming to work with tool calling. 

Also, the current implementation is not pretty :)

In [None]:
# base imports

import json
from dotenv import load_dotenv
from openai import OpenAI
import anthropic
import gradio as gr

load_dotenv(override=True)


In [8]:
# Instantaite clients and set system prompt

openai = OpenAI()
claude = anthropic.Anthropic()

SYSTEM_PROMPT = "\n".join([
    "You are a helpful technical tutor who answers questions about python code, software engineering, data science and LLMs",
    "You have access to a notebook_search tool that can search the course notebooks for relevant information to the user's question",
    "You always keep your answers concise and to the point",
])


## This is the tool
An index of embeddings for the course material - in this case just Week 2. But we could expand it to cover all the course material, so we can ask questions about it, and find references to things we forgot :)

We can provide the URL to the Notebooks class that we want to query access to

We opt out of the community contributions

In [None]:
from pathlib import Path
from sentence_transformers import SentenceTransformer
import faiss

# Set path to course notebooks
NOTEBOOK_DIR = Path('~/code/llm_engineering/week2').expanduser()

# Set embedding model (we could also use openai's embedding model)
EMBED_MODEL = "all-MiniLM-L6-v2"


class Notebooks:
    def __init__(self, notebook_dir: Path = None):
        self.embed_model = SentenceTransformer(EMBED_MODEL)
        if notebook_dir:
            self.load_notebooks(notebook_dir)

    # Load all notebooks to memory
    def load_notebooks(self, notebook_dir: Path):
        print('Reading from', notebook_dir)
        self.docs = []
        for notebook_path in notebook_dir.rglob("*.ipynb"):
            if 'community-contributions' in str(notebook_path):
                continue
            print(notebook_path)

            data = json.loads(notebook_path.read_text())
            
            # Include both markdown and code if available
            cells = []
            for cell in data.get("cells", []):
                if cell.get("cell_type") == "markdown":
                    cells.append("".join(cell["source"]))
                elif cell.get("cell_type") == "code":
                    code = "".join(cell["source"])
                    cells.append(f"```python\n{code}\n```")
                    if "outputs" in cell:
                        for output in cell["outputs"]:
                            if "text" in output:
                                cells.append("".join(output["text"]))
            
            text = "\n\n".join(cells).strip()
            
            if text:
                self.docs.append({
                    "path": str(notebook_path.relative_to(notebook_dir)),
                    "text": text
                })
        
        self._build_notebook_retriever()

    # Build FAISS index for retreival
    def _build_notebook_retriever(self):
        print('Building search index')
        texts = [d["text"] for d in self.docs]

        # Transform notebook text into embeddings
        embeddings = self.embed_model.encode(texts, convert_to_numpy=True, show_progress_bar=True)

        self.doc_index = faiss.IndexFlatL2(embeddings.shape[1])
        self.doc_index.add(embeddings)

    # Returns top n most similar notebook-markdown snippets
    def search(self, query: str, top_n: int = 3, max_distance: float = None):
        print('Looking for', query)
        # compute embeddings for the query
        embeddings = self.embed_model.encode([query], convert_to_numpy=True)
        
        # search the index
        distances, indices = self.doc_index.search(embeddings, top_n)

        # compile results
        results = []
        for dist, idx in zip(distances[0], indices[0]):
            if max_distance is not None and dist > max_distance:
                continue
            
            doc = self.docs[idx]
            excerpt = doc["text"]
            if len(excerpt) > 500:
                excerpt = excerpt[:500].rsplit("\n", 1)[0] + "…"
            
            results.append({
                "source": doc["path"],
                "excerpt": excerpt,
                "score": float(dist) # lower socre is more similar in L2 space
            })
        
        return results
    
    def as_tool(self):
        return { 
            "type": "function", 
            "function": {
                "name": "notebook_search",
                "description": "Searches the course notebooks and returns relevant excerpts with paths.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string", 
                            "description": "What to look for in the course notebooks"
                        },
                        "top_n": {
                            "type":"integer",
                            "description":"How many course notebook passages to return", 
                            "default": 3
                        }
                    },
                    "required": ["query"],
                    "additionalProperties": False
                }
            }
        }
        
    
notebooks = Notebooks(NOTEBOOK_DIR)


def notebook_search(query, top_n=3):
    return notebooks.search(query, top_n)


In [None]:
# Test tool here

notebooks.search("Gradio")


In [None]:
MODELS = dict(
    gpt='gpt-4o-mini',
    claude='claude-3-haiku-20240307',
)

def get_interactions(message, history):
    messages = []
    for user_msg, bot_msg in history:
        messages.append({"role":"user", "content":user_msg})
        messages.append({"role":"assistant", "content":bot_msg})
    messages.append({"role":"user", "content":message})
    return messages


def get_chatgpt_stream(model, message, history):
    print(f"Getting OpenAi stream, using {model}")
    interactions = get_interactions(message, history)
    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + interactions

    stream = openai.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.5,
        stream=True,
        tools=[
            notebooks.as_tool()
        ]
    )

    tool_call = None
    fn_name = None
    fn_args = ""
    tool_call_id = None
    buffer = ""
    
    for chunk in stream:
        delta = chunk.choices[0].delta

        # Handle normal content
        if delta and delta.content:
            buffer += delta.content or ''
            yield buffer

        # Handle tool call
        if delta and delta.tool_calls:
            tool_call = delta.tool_calls[0]
            if tool_call.id:
                tool_call_id = tool_call.id
            if tool_call.function.name:
                fn_name = tool_call.function.name
            if tool_call.function.arguments:
                fn_args += tool_call.function.arguments
            yield buffer  # Yield to keep Gradio updated
    
    if fn_name == "notebook_search" and fn_args and tool_call_id:
        print('Tool call to ', fn_name)

        args = json.loads(fn_args)
        result = notebook_search(**args)  # Returns list of dicts
        result_str = json.dumps(result, indent=2)
        print("Tool result:", result_str)

        # Append assistant message with tool call
        messages.append({
            "role": "assistant",
            "content": None,
            "tool_calls": [
                {
                    "id": tool_call_id,
                    "type": "function",
                    "function": {
                        "name": fn_name,
                        "arguments": fn_args
                    }
                }
            ]
        })

        messages.append({
            "role": "tool",
            "content": result_str,
            "tool_call_id": tool_call_id
        })
        messages.append({
            "role": "assistant",
            "content": "Make sure you reference the source notebook in your answer.",
        })

        # Follow-up chat call
        followup_stream = openai.chat.completions.create(
            model=model,
            messages=messages,
            temperature=0.5,
            stream=True
        )

        # Stream follow-up response
        for chunk in followup_stream:
            delta = chunk.choices[0].delta
            if delta.content:
                buffer += delta.content or ""
                yield buffer


def get_claude_stream(model, message, history):
    print(f"Getting Claude stream, using {model}")
    interactions = get_interactions(message, history)

    with claude.messages.stream(
        model=model,
        messages=interactions,
        max_tokens=500,
        system=SYSTEM_PROMPT,
    ) as stream:
        buffer = ""
        for delta in stream.text_stream:
            buffer += delta
            yield buffer


def chat(model_selector, message, history):
    model = MODELS.get(model_selector)
    if not model:
        raise ValueError(f"Invalid model: {model_selector}")
    
    reply = ""
    if model_selector == 'gpt':
        for partial in get_chatgpt_stream(model, message, history):
            reply = partial
            yield history + [(message, reply)]

    elif model_selector == 'claude':
        for partial in get_claude_stream(model, message, history):
            reply = partial
            yield history + [(message, reply)]
    

with gr.Blocks() as demo:
    model_selector = gr.Dropdown(
        choices=MODELS.keys(),
        value="gpt", 
        label="Pick Model",
    )
    chatbot = gr.Chatbot()
    txt = gr.Textbox(placeholder="Ask about python", show_label=False)
    txt.submit(
        fn=chat,
        inputs=[model_selector, txt, chatbot],
        outputs=[chatbot],
    ).then(
        fn=lambda: "",
        inputs=None,
        outputs=txt
    )

    clear = gr.Button("Clear")
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()
