# Week 3: AI Tutor with Synthetic Data Generator

Uses Hugging Face Hub (pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig) to generate synthetic teaching scenarios. The tutor weaves these into responses via the system prompt.


In [None]:
!pip install -q transformers accelerate bitsandbytes gradio python-dotenv openai

In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# Environment: Colab Secrets vs local .env
try:
    from google.colab import userdata
    IN_COLAB = True
    HF_TOKEN = userdata.get("HF_TOKEN")
except Exception:
    IN_COLAB = False
    load_dotenv(override=True)
    HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN:
    from huggingface_hub import login
    login(token=HF_TOKEN)
    print("Hugging Face login OK")
else:
    print("Set HF_TOKEN in Colab Secrets or .env to use the synthetic generator.")

In [None]:
# Synthetic generator: small model + 4-bit quant for Colab/local
SYNTHETIC_GEN_MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MAX_SYNTHETIC_TOKENS = 256

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
)

synthetic_tokenizer = AutoTokenizer.from_pretrained(SYNTHETIC_GEN_MODEL_ID)
if synthetic_tokenizer.pad_token is None:
    synthetic_tokenizer.pad_token = synthetic_tokenizer.eos_token

synthetic_model = AutoModelForCausalLM.from_pretrained(
    SYNTHETIC_GEN_MODEL_ID,
    device_map="auto",
    quantization_config=quantization_config,
)

synthetic_text_pipeline = pipeline(
    "text-generation",
    model=synthetic_model,
    tokenizer=synthetic_tokenizer,
    max_new_tokens=MAX_SYNTHETIC_TOKENS,
)

In [None]:
def generate_synthetic_examples(topic: str) -> str:
    """Generate 1â€“2 short teaching scenarios or code examples for the given topic."""
    scenario_prompt = (
        f"Generate two very short teaching scenarios or concrete code examples "
        f"for explaining: {topic}. Each in 1-2 sentences. No preamble."
    )
    pipe_output = synthetic_text_pipeline(scenario_prompt, do_sample=True, temperature=0.7)
    generated_text = pipe_output[0]["generated_text"] if pipe_output else ""
    if not generated_text:
        return ""
    # Strip the prompt from the model output and cap length
    return generated_text.replace(scenario_prompt, "").strip()[:500]

In [None]:
# Chat backends: OpenRouter (Colab or local) and Ollama (local only)
try:
    from google.colab import userdata
    IN_COLAB = True
    openrouter_api_key = userdata.get("OPENROUTER_API_KEY")
except Exception:
    IN_COLAB = False
    load_dotenv(override=True)
    openrouter_api_key = os.getenv("OPENROUTER_API_KEY")

OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
OLLAMA_BASE_URL = "http://localhost:11434/v1"

openrouter_client = OpenAI(api_key=openrouter_api_key, base_url=OPENROUTER_BASE_URL) if openrouter_api_key else None
ollama_client = OpenAI(api_key="ollama", base_url=OLLAMA_BASE_URL)

# (display_label, model_id, backend_name)
AVAILABLE_MODELS = [
    ("GPT-4o-mini (OpenRouter)", "openai/gpt-4o-mini", "openrouter"),
]
if not IN_COLAB:
    AVAILABLE_MODELS.append(("Llama 3.2 (Ollama)", "llama3.2", "ollama"))

TUTOR_SYSTEM_PROMPT_TEMPLATE = """You are a professional AI coding tutor. Give clear, step-by-step explanations with code examples. Use the following synthetic teaching scenarios or examples to enrich your answer when relevant. Weave them into your explanation.

Synthetic scenarios/examples to use when helpful:
{synthetic}

Keep a friendly, expert tone. Respond in markdown (e.g. code blocks)."""

In [None]:
def _get_client_for_backend(backend: str):
    if backend == "openrouter":
        return openrouter_client
    return ollama_client


def stream_reply(conversation_history, user_message: str, selected_model_label: str, inject_synthetic: bool):
    """Stream chat completion; yields content chunks. conversation_history is list of (user, assistant) tuples."""
    label_to_model_and_backend = {
        label: (mid, backend) for label, mid, backend in AVAILABLE_MODELS
    }
    selected_model_id, backend_name = label_to_model_and_backend.get(
        selected_model_label, AVAILABLE_MODELS[0][1:]
    )
    llm_client = _get_client_for_backend(backend_name)

    if llm_client is None:
        if backend_name == "openrouter":
            error_msg = (
                "**OpenRouter** key not found. In Colab: open the **Secrets** panel (key icon in left sidebar). "
                "Locally: add `OPENROUTER_API_KEY` to .env."
            )
        else:
            error_msg = "Ollama is for local runs only. In Colab use OpenRouter."
        yield error_msg
        return

    synthetic_scenarios = generate_synthetic_examples(user_message[:200]) if inject_synthetic else "(none)"
    system_message_content = TUTOR_SYSTEM_PROMPT_TEMPLATE.format(synthetic=synthetic_scenarios)
    chat_messages = [{"role": "system", "content": system_message_content}]
    for prev_user, prev_assistant in conversation_history:
        chat_messages.append({"role": "user", "content": prev_user})
        chat_messages.append({"role": "assistant", "content": prev_assistant or ""})
    chat_messages.append({"role": "user", "content": user_message})

    response_stream = llm_client.chat.completions.create(
        model=selected_model_id, messages=chat_messages, stream=True
    )
    for stream_chunk in response_stream:
        content_chunk = stream_chunk.choices[0].delta.content or ""
        if content_chunk:
            yield content_chunk


def chat(user_input: str, conversation_history, selected_model_label: str, inject_synthetic: bool):
    """Gradio chat fn: streams reply by yielding accumulated content."""
    if not user_input or not user_input.strip():
        return
    accumulated_response = ""
    for content_chunk in stream_reply(
        conversation_history, user_input, selected_model_label, inject_synthetic
    ):
        accumulated_response += content_chunk
        yield accumulated_response

In [None]:
model_selector = gr.Dropdown(
    choices=[label for label, _, _ in AVAILABLE_MODELS],
    value=AVAILABLE_MODELS[0][0],
    label="Model",
)
inject_synthetic_checkbox = gr.Checkbox(value=True, label="Inject synthetic teaching scenarios")

tutor_demo = gr.ChatInterface(
    chat,
    additional_inputs=[model_selector, inject_synthetic_checkbox],
    title="Technical Q&A Tutor + Synthetic Data",
    description="Ask a coding question. Toggle to inject HuggingFace-generated scenarios into the answer.",
)
tutor_demo.launch()