# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

In [None]:
# imports
import os
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display, update_display

In [None]:
# set up environment & constants
load_dotenv(override=True)
MODEL_GPT, MODEL_LLAMA = 'gpt-4o-mini', 'llama3.2'

MODEL_CONFIG = {
    MODEL_LLAMA: {"base_url": "http://localhost:11434/v1", "api_key": "ollama"},
    MODEL_GPT: {"base_url": "https://openrouter.ai/api/v1", "api_key": os.getenv("OPENROUTER_API_KEY")},
}

# Validate OpenRouter key once at startup (Ollama needs no key)
_key = MODEL_CONFIG[MODEL_GPT]["api_key"]
if not (_key and len(_key) > 10):
    print("OpenRouter API key may be missing. Check .env and troubleshooting notebook.")

In [None]:
def get_client(model):
    cfg = MODEL_CONFIG.get(model)
    if not cfg:
        raise ValueError(f"Unknown model: {model}. Use {MODEL_GPT} or {MODEL_LLAMA}")
    return OpenAI(base_url=cfg["base_url"], api_key=cfg["api_key"])

In [None]:
# Prompts & main helper below

In [None]:
# prompts — System Design Interview Expert

SYSTEM_PROMPT = """
You are a senior staff engineer conducting a system design interview at a top tech company (FAANG-level). Your role is to guide candidates through a structured, realistic system design discussion.

## Your Approach

1. **Requirements Clarification** — Start by clarifying functional and non-functional requirements. Ask about scale (DAU, QPS, storage), consistency needs, latency targets, and key use cases. Make reasonable assumptions when the user doesn't specify.

2. **High-Level Design** — Propose a top-level architecture: clients, load balancers, API servers, core services, databases, caches, message queues. Draw ASCII diagrams when helpful. Identify the main components and data flow.

3. **Deep Dive** — Zoom into 2-3 critical components: data models, sharding strategy, caching layers, replication, or consistency mechanisms. Discuss trade-offs (e.g., consistency vs availability, read vs write optimization).

4. **Scale & Bottlenecks** — Address scalability: horizontal vs vertical scaling, back-of-envelope capacity estimates (storage, bandwidth, QPS). Identify potential bottlenecks and mitigation strategies.

5. **Fault Tolerance & Operations** — Briefly cover failure modes, replication, failover, monitoring, and operational concerns.

## Output Style

- Use clear markdown: headers, bullet points, code blocks for schemas or configs.
- Include simple ASCII diagrams for architecture (e.g., Client → LB → API → DB).
- Be concise but thorough. Prioritize clarity over length.
- When making assumptions, state them explicitly (e.g., "Assuming 10M DAU...").
- Reference real-world patterns: consistent hashing, write-ahead logs, leader election, etc.

## Tone

- Professional and interview-like. Assume the "candidate" (user) is competent and engaged.
- Don't over-explain basics; focus on the non-obvious and trade-off discussions.
"""

# Template: user provides the system design question
USER_PROMPT_TEMPLATE = """
Design {question}

{optional_context}
"""

# Example system design questions you can plug in:
EXAMPLE_QUESTIONS = [
    "Design a URL shortener like bit.ly",
    "Design a rate limiter for an API",
    "Design a distributed cache (like Redis)",
    "Design a chat system (like Slack or WhatsApp)",
    "Design YouTube or Netflix (video streaming)",
    "Design a search autocomplete system",
    "Design a notification system",
]


def build_user_prompt(question: str, context: str = "") -> str:
    ctx = f"Context/Constraints:\n{context}\n\n" if context.strip() else ""
    return USER_PROMPT_TEMPLATE.format(question=question, optional_context=ctx)

In [None]:
def ask_system_design(question: str, model: str = MODEL_GPT, context: str = "", stream: bool = True):
    """Ask a system design question. Streams by default."""
    client = get_client(model)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": build_user_prompt(question, context)},
    ]
    response = client.chat.completions.create(model=model, messages=messages, stream=stream)
    display_handle = display(Markdown(""), display_id=True)
    if not stream:
        return response.choices[0].message.content
    full = ""
    for chunk in response:
        content = chunk.choices[0].delta.content or ""
        full += content
        update_display(Markdown(full), display_id=display_handle.display_id)
    return full

In [None]:
# Demo: GPT-4o-mini (streaming)
ask_system_design("Design a URL shortener like bit.ly", model=MODEL_GPT)

In [None]:
# Demo: Llama 3.2 via Ollama (streaming)
ask_system_design("Design a rate limiter for an API", model=MODEL_LLAMA)

In [None]:
# With optional context (scale, constraints)
ask_system_design(
    "Design a chat system like Slack",
    model=MODEL_GPT,
    context="Scale: 50M DAU, 10B messages/day. Focus on real-time delivery and message ordering."
)