# Module 6 Assessment â€” TEMPLATE WITH HIDDEN TESTS (Instructor/Grading)

This template grades the student notebook deterministically (no LLMs).

**Structure:**
- 5 Coding Tasks (80 points): Unit tests with assertions
- 1 Written Task (20 points): Keyword groups + minimum length

Feedback written into `assessment_result.json`

In [None]:
# Setup - standard library only
import json
import time
from typing import Optional, Dict, Any, List, Callable, TypeVar

print("Setup complete!")

In [None]:
__assessment_scores = {}
__assessment_feedback = {}

def record_score(task, points, max_points, feedback):
    __assessment_scores[task] = (points, max_points)
    __assessment_feedback[task] = feedback

def validate_answer(
    answer,
    required_groups=None,
    forbidden_strings=None,
    forbidden_characters=None,
    min_length=0,
    max_length=None,
):
    """
    Validate an answer using string-level rules.
    Returns (passed: bool, reasons: list[str])
    """
    reasons = []
    text = answer.strip()
    t_lower = text.lower()

    # Length checks
    if len(text) < min_length:
        reasons.append(f"Too short (min {min_length} chars, got {len(text)})")

    if max_length is not None and len(text) > max_length:
        reasons.append(f"Too long (max {max_length} chars)")

    # Required keyword groups (AND logic - must have at least one from each group)
    if required_groups:
        for group in required_groups:
            if not any(kw in t_lower for kw in group):
                reasons.append(f"Missing concept from: {group[:2]}...")

    # Forbidden substrings (AI detection)
    if forbidden_strings:
        matched = [s for s in forbidden_strings if s in t_lower]
        if len(matched) >= 2:
            reasons.append(f"Appears AI-generated. Detected: {matched[:3]}")

    # Forbidden characters (markdown formatting from copy-paste)
    if forbidden_characters:
        found = [ch for ch in forbidden_characters if ch in text]
        if found:
            reasons.append(f"Contains formatting characters (copy-paste?): {found}")
    
    # Check for suspiciously perfect structure (numbered lists with consistent formatting)
    import re
    numbered_pattern = re.findall(r'^\d+\.\s', text, re.MULTILINE)
    if len(numbered_pattern) >= 4:
        reasons.append("Suspiciously structured (numbered list format typical of AI)")
    
    # Check for excessive use of transitional phrases
    transitions = ["furthermore", "moreover", "additionally", "consequently", 
                   "therefore", "thus", "hence", "accordingly", "subsequently"]
    transition_count = sum(1 for t in transitions if t in t_lower)
    if transition_count >= 3:
        reasons.append(f"Excessive formal transitions ({transition_count} found) - likely AI")

    passed = len(reasons) == 0
    return passed, reasons

# Common AI phrases that indicate copy-paste from ChatGPT/Claude
AI_PHRASES = [
    # Self-identification
    "as an ai",
    "as a large language model",
    "i'm happy to help",
    "i'd be happy to",
    "i cannot",
    "i can't provide",
    
    # Structural phrases
    "let me explain",
    "let me break this down",
    "let's dive into",
    "let's explore",
    "here's a comprehensive",
    "here's an overview",
    "here are the key",
    
    # Emphasis phrases
    "it's important to note that",
    "it's worth noting that",
    "it is important to understand",
    "it's crucial to",
    "it's essential to",
    "it bears mentioning",
    
    # Summary phrases
    "in summary,",
    "in conclusion,",
    "to summarize,",
    "to sum up,",
    "overall,",
    "in short,",
    
    # Filler phrases
    "first and foremost",
    "last but not least",
    "needless to say",
    "it goes without saying",
    
    # Overused AI words
    "delve into",
    "delve deeper",
    "crucial to understand",
    "landscape of",
    "realm of",
    "paradigm",
    "leverage the power",
    "harness the capabilities",
    "utilize",
    "facilitate",
    "comprehensive understanding",
    "nuanced",
    "robust",
    "seamless",
    "streamline",
    
    # Position phrases
    "at its core,",
    "fundamentally,",
    "essentially,",
    "in essence,",
    "inherently,",
    "intrinsically,",
    
    # Emphasis adjectives
    "pivotal role",
    "multifaceted",
    "myriad of",
    "plethora of",
    "wide array of",
    "diverse range of",
    
    # AI hedging
    "it's worth mentioning",
    "one could argue",
    "it can be said that",
    "generally speaking",
    
    # ChatGPT specific
    "i hope this helps",
    "feel free to ask",
    "happy to clarify",
    "let me know if",
    
    # Claude specific  
    "i appreciate",
    "great question",
    "that's a thoughtful",
]

# Markdown formatting characters that suggest copy-paste
FORBIDDEN_CHARS = ["##", "**", "```", "* ", "- [ ]", "###", ">>>", "==="]

# Written task rules
WRITTEN_RULES = {
  "Task 6": {
      "var": "production_explanation",
      "min_len": 400,
      "max_points": 20,
      "groups": [
          ["service", "api", "remote", "external"],
          ["retry", "backoff", "fail", "error"],
          ["valid", "json", "parse", "check"],
          ["test", "mock", "unit", "cost"]
      ]
  }
}

print("Scoring infrastructure ready.")

---
## Task Tests (1-6)

In [None]:
# Task 1: LLM Client Class (20 points) [Coding]
points = 0
fb = []
max_points = 20

try:
    assert "LLMClient" in globals(), "LLMClient class not defined"
    LLMClient_class = globals()["LLMClient"]
    
    # Test 1: Can instantiate with all parameters
    client = LLMClient_class("http://localhost:11434/", api_key="test-key", model="test-model")
    fb.append("\u2713 Test 1 passed (instantiation works)")
    points += 5
    
    # Test 2: base_url stored correctly with trailing slash removed
    assert hasattr(client, 'base_url'), "Client missing base_url attribute"
    assert client.base_url == "http://localhost:11434", f"base_url not cleaned: {client.base_url}"
    fb.append("\u2713 Test 2 passed (base_url cleaned)")
    points += 5
    
    # Test 3: api_key stored correctly
    assert hasattr(client, 'api_key'), "Client missing api_key attribute"
    assert client.api_key == "test-key", f"api_key not stored: {client.api_key}"
    fb.append("\u2713 Test 3 passed (api_key stored)")
    points += 5
    
    # Test 4: model stored correctly
    assert hasattr(client, 'model'), "Client missing model attribute"
    assert client.model == "test-model", f"model not stored: {client.model}"
    fb.append("\u2713 Test 4 passed (model stored)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 1 - LLM Client Class", points, max_points, fb)

In [None]:
# Task 2: JSON Response Parser (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "parse_json_response" in globals(), "parse_json_response function not defined"
    parse_fn = globals()["parse_json_response"]
    
    # Test 1: Parse plain JSON
    result = parse_fn('{"status": "ok"}')
    assert result == {"status": "ok"}, f"Plain JSON parse failed: {result}"
    fb.append("\u2713 Test 1 passed (plain JSON)")
    points += 5
    
    # Test 2: Parse JSON with markdown code block
    result = parse_fn('```json\n{"status": "ok"}\n```')
    assert result == {"status": "ok"}, f"Markdown JSON parse failed: {result}"
    fb.append("\u2713 Test 2 passed (markdown code block)")
    points += 5
    
    # Test 3: Raise ValueError on invalid JSON
    try:
        parse_fn("not valid json")
        fb.append("\u2717 Test 3 failed (should raise ValueError)")
    except ValueError as e:
        assert "invalid json" in str(e).lower(), f"Wrong error message: {e}"
        fb.append("\u2713 Test 3 passed (raises ValueError)")
        points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 2 - JSON Parser", points, max_points, fb)

In [None]:
# Task 3: Retry with Exponential Backoff (20 points) [Coding]
points = 0
fb = []
max_points = 20

try:
    assert "retry_with_backoff" in globals(), "retry_with_backoff function not defined"
    retry_fn = globals()["retry_with_backoff"]
    
    # Test 1: Successful call returns result
    result = retry_fn(lambda: "success", max_retries=3, base_delay=0.001)
    assert result == "success", f"Successful call should return result: {result}"
    fb.append("\u2713 Test 1 passed (returns result on success)")
    points += 5
    
    # Test 2: Retries on failure then succeeds
    attempt_count = [0]
    def fail_twice():
        attempt_count[0] += 1
        if attempt_count[0] < 3:
            raise RuntimeError("fail")
        return "worked"
    
    result = retry_fn(fail_twice, max_retries=3, base_delay=0.001)
    assert result == "worked", f"Should succeed after retries: {result}"
    assert attempt_count[0] == 3, f"Should have tried 3 times: {attempt_count[0]}"
    fb.append("\u2713 Test 2 passed (retries then succeeds)")
    points += 5
    
    # Test 3: Raises exception after max retries
    def always_fail():
        raise RuntimeError("permanent failure")
    
    try:
        retry_fn(always_fail, max_retries=2, base_delay=0.001)
        fb.append("\u2717 Test 3 failed (should raise after max retries)")
    except RuntimeError as e:
        assert "permanent failure" in str(e)
        fb.append("\u2713 Test 3 passed (raises after max retries)")
        points += 5
    
    # Test 4: Check exponential backoff timing (approximate)
    timing_attempts = [0]
    timing_start = time.time()
    
    def timed_fail():
        timing_attempts[0] += 1
        if timing_attempts[0] < 3:
            raise RuntimeError("fail")
        return "done"
    
    retry_fn(timed_fail, max_retries=3, base_delay=0.05)  # 50ms base
    elapsed = time.time() - timing_start
    # Should take at least 0.05 + 0.1 = 0.15 seconds (first retry + second retry)
    # Allow some tolerance
    if elapsed >= 0.1:
        fb.append("\u2713 Test 4 passed (exponential delay)")
        points += 5
    else:
        fb.append(f"\u2717 Test 4 failed (elapsed {elapsed:.3f}s, expected >= 0.1s)")
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 3 - Retry with Backoff", points, max_points, fb)

In [None]:
# Task 4: Schema Validation (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "validate_schema" in globals(), "validate_schema function not defined"
    validate_fn = globals()["validate_schema"]
    
    # Test 1: Valid data passes
    result = validate_fn({"name": "Alice", "age": 30}, ["name", "age"])
    assert result == {"name": "Alice", "age": 30}, f"Valid data should be returned: {result}"
    fb.append("\u2713 Test 1 passed (valid data passes)")
    points += 5
    
    # Test 2: Missing single field raises ValueError
    try:
        validate_fn({"name": "Alice"}, ["name", "age"])
        fb.append("\u2717 Test 2 failed (should raise ValueError)")
    except ValueError as e:
        assert "missing" in str(e).lower() or "age" in str(e), f"Error should mention missing fields: {e}"
        fb.append("\u2713 Test 2 passed (raises on missing field)")
        points += 5
    
    # Test 3: Missing multiple fields lists them
    try:
        validate_fn({}, ["field1", "field2", "field3"])
        fb.append("\u2717 Test 3 failed (should raise ValueError)")
    except ValueError as e:
        error_str = str(e).lower()
        # Should mention missing fields
        if "missing" in error_str or "field" in error_str:
            fb.append("\u2713 Test 3 passed (lists missing fields)")
            points += 5
        else:
            fb.append(f"\u2717 Test 3 failed (should list missing fields): {e}")
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 4 - Schema Validation", points, max_points, fb)

In [None]:
# Task 5: Build JSON Prompt (10 points) [Coding]
points = 0
fb = []
max_points = 10

try:
    assert "build_json_prompt" in globals(), "build_json_prompt function not defined"
    build_fn = globals()["build_json_prompt"]
    
    test_schema = {"sentiment": "positive|negative|neutral"}
    prompt = build_fn("Analyze sentiment", test_schema)
    
    assert prompt is not None, "Function returned None"
    assert isinstance(prompt, str), f"Should return string, got {type(prompt)}"
    
    prompt_lower = prompt.lower()
    
    # Test 1: Contains "only" instruction
    if "only" in prompt_lower:
        fb.append("\u2713 Test 1 passed (contains ONLY instruction)")
        points += 3
    else:
        fb.append("\u2717 Test 1 failed (should contain 'ONLY')")
    
    # Test 2: Contains "json" keyword
    if "json" in prompt_lower:
        fb.append("\u2713 Test 2 passed (mentions JSON)")
        points += 3
    else:
        fb.append("\u2717 Test 2 failed (should mention JSON)")
    
    # Test 3: Contains the schema fields
    if "sentiment" in prompt_lower:
        fb.append("\u2713 Test 3 passed (includes schema)")
        points += 4
    else:
        fb.append("\u2717 Test 3 failed (should include schema)")
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 5 - Build JSON Prompt", points, max_points, fb)

In [None]:
# Task 6: Production LLM Integration (20 points) [Written]
points = 0
fb = []
try:
    r = WRITTEN_RULES["Task 6"]
    assert r["var"] in globals(), f"{r['var']} variable missing"
    text = globals()[r["var"]]
    
    passed, reasons = validate_answer(
        text,
        required_groups=r["groups"],
        forbidden_strings=AI_PHRASES,
        forbidden_characters=FORBIDDEN_CHARS,
        min_length=r["min_len"]
    )
    
    if passed:
        points = r["max_points"]
        fb.append("\u2713 Passed")
    else:
        for reason in reasons:
            fb.append(f"\u2717 {reason}")
            
except AssertionError as e:
    fb.append(f"\u2717 {e}")
record_score("Task 6 - Production Explanation", points, WRITTEN_RULES["Task 6"]["max_points"], fb)

---
## Generate Results

In [None]:
import json, datetime, re

# Sort scores by task number (extract number from "Task N - ...")
def task_sort_key(item):
    match = re.search(r'Task (\d+)', item[0])
    return int(match.group(1)) if match else 99

sorted_scores = dict(sorted(__assessment_scores.items(), key=task_sort_key))
sorted_feedback = {k: __assessment_feedback[k] for k in sorted_scores.keys()}

# Calculate totals
total_points = sum(s[0] for s in sorted_scores.values())
max_possible = sum(s[1] for s in sorted_scores.values())

result = {
  "scores": sorted_scores,
  "feedback": sorted_feedback,
  "total": f"{total_points}/{max_possible}",
  "percentage": round(100 * total_points / max_possible, 1) if max_possible > 0 else 0,
  "timestamp": datetime.datetime.now().isoformat()
}

with open("assessment_result.json", "w") as f:
    json.dump(result, f, indent=2)

print(f"\n{'='*50}")
print(f"ASSESSMENT RESULTS: {total_points}/{max_possible} ({result['percentage']}%)")
print(f"{'='*50}\n")

for task, (pts, mx) in sorted_scores.items():
    status = "\u2713" if pts == mx else "\u2717" if pts == 0 else "~"
    print(f"{status} {task}: {pts}/{mx}")
    for line in sorted_feedback[task]:
        print(f"    {line}")

result