# Week 3 Exercise – Llama 3.2 Instruct (Hugging Face) quizzes you and scores you (abdussamadbello)

You **select topics** → **Llama 3.2 Instruct** (Hugging Face) **generates a quiz** → **you** answer → **Llama scores** your answers and gives feedback. No OpenAI; runs locally with `transformers`.

**Auth (required for gated model):** Add `HF_TOKEN=your_token` to your `.env` (never commit it). Accept the [Llama 3.2 license](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the Hub first; the notebook calls `login(token=HF_TOKEN)` before loading the model.

In [None]:
import os
import re
import json
import torch
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM

load_dotenv()
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(token=hf_token)
else:
    login()

MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None,
    trust_remote_code=True,
)
if not torch.cuda.is_available():
    model = model.to("cpu")

TOPICS = ["RAG", "prompts", "evaluation"]


def llm_generate(user_prompt: str, max_new_tokens: int = 400) -> str:
    messages = [{"role": "user", "content": user_prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(text, return_tensors="pt")
    if torch.cuda.is_available():
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
    out = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.6, pad_token_id=tokenizer.eos_token_id)
    return tokenizer.decode(out[0], skip_special_tokens=True)[len(text):].strip()


def generate_quiz(topics: list, questions_per_topic: int = 1) -> list:
    prompt = f"""Generate a short quiz. Topics: {', '.join(topics)}. Create exactly {questions_per_topic} question(s) per topic. Output only a JSON array. Each item: {{\"topic\": \"<topic>\", \"question\": \"<question>\"}}"""
    raw = llm_generate(prompt, max_new_tokens=500).strip()
    if raw.startswith("```"): raw = re.sub(r"^```\w*\n?", "", raw).rstrip("`").strip()
    try:
        return json.loads(raw)
    except json.JSONDecodeError:
        m = re.search(r"\[[\s\S]*\]", raw)
        return json.loads(m.group(0)) if m else [{"topic": t, "question": f"Explain {t}."} for t in topics]


def score_answers(questions_with_answers: list) -> list:
    scored = []
    for item in questions_with_answers:
        prompt = f"""Score from 1 to 5. Reply with two lines: Score: <n> and Feedback: <sentence>. Question: {item['question']} Student's answer: {item['user_answer']}"""
        text = llm_generate(prompt, max_new_tokens=120)
        score, feedback = 3, text
        for line in text.split("\n"):
            if "score:" in line.lower() and re.search(r"\d+", line): score = max(1, min(5, int(re.search(r"\d+", line).group())))
            elif "feedback:" in line.lower(): feedback = line.split(":", 1)[-1].strip() or text
        scored.append({**item, "score": score, "feedback": feedback})
    return scored


def print_scores(scored: list):
    for i, r in enumerate(scored, 1):
        print(f"Q{i} [{r['topic']}] Score: {r['score']}/5 - {r['feedback']}")
    print(f"Total: {sum(r['score'] for r in scored)}/{len(scored) * 5}")


In [None]:
questions = generate_quiz(TOPICS, questions_per_topic=1)
for i, q in enumerate(questions, 1):
    print(f"Q{i} [{q['topic']}]: {q['question']}")


In [None]:
sample_answers = ["RAG retrieves docs and feeds them to the model to reduce hallucination.", "Use clear tool names and descriptions in the system prompt.", "Use a judge model to score answers."]
answers = [{"topic": q["topic"], "question": q["question"], "user_answer": sample_answers[i] if i < len(sample_answers) else "(no answer)"} for i, q in enumerate(questions)]
scored = score_answers(answers)
print_scores(scored)
