# Module 5 Assessment — TEMPLATE WITH HIDDEN TESTS (Instructor/Grading)

This template grades the student notebook deterministically (no LLMs).

**Structure:**
- 5 Coding Tasks (80 points): Unit tests with assertions
- 1 Written Task (20 points): Keyword groups + minimum length

Feedback written into `assessment_result.json`

In [None]:
# Setup - packages are pre-installed in grading environment
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import faiss

# Load the embedding model (cached after first download)
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Setup complete!")

In [None]:
# Corpus of documents (do not modify)
corpus = [
    "Interest rates were increased by the central bank to control inflation.",
    "The central bank raised borrowing costs to fight rising prices.",
    "Quarterly earnings improved as net interest margin widened.",
    "The Federal Reserve announced a 25 basis point rate hike.",
    "Mortgage rates have reached their highest level in 20 years.",
    "Football is a popular sport played across Europe.",
    "The team won the championship after a dramatic penalty shootout.",
    "Basketball players competed in the international tournament."
]

query = "Why did the central bank increase rates?"

print(f"Corpus loaded: {len(corpus)} documents")

In [None]:
__assessment_scores = {}
__assessment_feedback = {}

def record_score(task, points, max_points, feedback):
    __assessment_scores[task] = (points, max_points)
    __assessment_feedback[task] = feedback

def validate_answer(
    answer,
    required_groups=None,
    forbidden_strings=None,
    forbidden_characters=None,
    min_length=0,
    max_length=None,
):
    """
    Validate an answer using string-level rules.
    Returns (passed: bool, reasons: list[str])
    """
    reasons = []
    text = answer.strip()
    t_lower = text.lower()

    # Length checks
    if len(text) < min_length:
        reasons.append(f"Too short (min {min_length} chars, got {len(text)})")

    if max_length is not None and len(text) > max_length:
        reasons.append(f"Too long (max {max_length} chars)")

    # Required keyword groups (AND logic - must have at least one from each group)
    if required_groups:
        for group in required_groups:
            if not any(kw in t_lower for kw in group):
                reasons.append(f"Missing concept from: {group[:2]}...")

    # Forbidden substrings (AI detection)
    if forbidden_strings:
        matched = [s for s in forbidden_strings if s in t_lower]
        if len(matched) >= 2:
            reasons.append(f"Appears AI-generated. Detected: {matched[:3]}")

    # Forbidden characters (markdown formatting from copy-paste)
    if forbidden_characters:
        found = [ch for ch in forbidden_characters if ch in text]
        if found:
            reasons.append(f"Contains formatting characters (copy-paste?): {found}")
    
    # Check for suspiciously perfect structure (numbered lists with consistent formatting)
    import re
    numbered_pattern = re.findall(r'^\d+\.\s', text, re.MULTILINE)
    if len(numbered_pattern) >= 4:
        reasons.append("Suspiciously structured (numbered list format typical of AI)")
    
    # Check for excessive use of transitional phrases
    transitions = ["furthermore", "moreover", "additionally", "consequently", 
                   "therefore", "thus", "hence", "accordingly", "subsequently"]
    transition_count = sum(1 for t in transitions if t in t_lower)
    if transition_count >= 3:
        reasons.append(f"Excessive formal transitions ({transition_count} found) - likely AI")

    passed = len(reasons) == 0
    return passed, reasons

# Common AI phrases that indicate copy-paste from ChatGPT/Claude
AI_PHRASES = [
    # Self-identification
    "as an ai",
    "as a large language model",
    "i'm happy to help",
    "i'd be happy to",
    "i cannot",
    "i can't provide",
    
    # Structural phrases
    "let me explain",
    "let me break this down",
    "let's dive into",
    "let's explore",
    "here's a comprehensive",
    "here's an overview",
    "here are the key",
    
    # Emphasis phrases
    "it's important to note that",
    "it's worth noting that",
    "it is important to understand",
    "it's crucial to",
    "it's essential to",
    "it bears mentioning",
    
    # Summary phrases
    "in summary,",
    "in conclusion,",
    "to summarize,",
    "to sum up,",
    "overall,",
    "in short,",
    
    # Filler phrases
    "first and foremost",
    "last but not least",
    "needless to say",
    "it goes without saying",
    
    # Overused AI words
    "delve into",
    "delve deeper",
    "crucial to understand",
    "landscape of",
    "realm of",
    "paradigm",
    "leverage the power",
    "harness the capabilities",
    "utilize",
    "facilitate",
    "comprehensive understanding",
    "nuanced",
    "robust",
    "seamless",
    "streamline",
    
    # Position phrases
    "at its core,",
    "fundamentally,",
    "essentially,",
    "in essence,",
    "inherently,",
    "intrinsically,",
    
    # Emphasis adjectives
    "pivotal role",
    "multifaceted",
    "myriad of",
    "plethora of",
    "wide array of",
    "diverse range of",
    
    # AI hedging
    "it's worth mentioning",
    "one could argue",
    "it can be said that",
    "generally speaking",
    
    # ChatGPT specific
    "i hope this helps",
    "feel free to ask",
    "happy to clarify",
    "let me know if",
    
    # Claude specific  
    "i appreciate",
    "great question",
    "that's a thoughtful",
]

# Markdown formatting characters that suggest copy-paste
FORBIDDEN_CHARS = ["##", "**", "```", "* ", "- [ ]", "###", ">>>", "==="]

# Written task rules
WRITTEN_RULES = {
  "Task 6": {
      "var": "rag_explanation",
      "min_len": 400,
      "max_points": 20,
      "groups": [
          ["embed", "vector", "represent"],
          ["similar", "close", "distance", "cosine"],
          ["rag", "retrieval", "retrieve", "ground"],
          ["hallucin", "fabricat", "made up", "invent"]
      ]
  }
}

print("Scoring infrastructure ready.")

---
## Task Tests (1-6)

In [None]:
# Task 1: Generate Embeddings (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "corpus_embeddings" in globals(), "corpus_embeddings variable not defined"
    embeddings = globals()["corpus_embeddings"]
    assert embeddings is not None, "corpus_embeddings is None"
    
    # Test 1: Check type is numpy array
    assert hasattr(embeddings, 'shape'), "corpus_embeddings should be a numpy array"
    fb.append("\u2713 Test 1 passed (numpy array)")
    points += 5
    
    # Test 2: Check shape is (8, 384)
    assert embeddings.shape == (8, 384), f"Expected shape (8, 384), got {embeddings.shape}"
    fb.append("\u2713 Test 2 passed (correct shape)")
    points += 5
    
    # Test 3: Check vectors are normalized (norm ≈ 1.0)
    norms = np.linalg.norm(embeddings, axis=1)
    assert np.allclose(norms, 1.0, atol=0.01), f"Vectors not normalized. Norms: {norms[:3]}..."
    fb.append("\u2713 Test 3 passed (normalized vectors)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 1 - Generate Embeddings", points, max_points, fb)

In [None]:
# Task 2: Calculate Similarity Scores (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "similarity_scores" in globals(), "similarity_scores variable not defined"
    scores = globals()["similarity_scores"]
    assert scores is not None, "similarity_scores is None"
    
    # Test 1: Check it's a numpy array
    assert hasattr(scores, 'shape'), "similarity_scores should be a numpy array"
    fb.append("\u2713 Test 1 passed (numpy array)")
    points += 5
    
    # Test 2: Check shape is (8,) - 1D array of 8 scores
    assert scores.shape == (8,) or scores.shape == (1, 8), f"Expected shape (8,), got {scores.shape}"
    scores_flat = scores.flatten() if len(scores.shape) > 1 else scores
    fb.append("\u2713 Test 2 passed (correct shape)")
    points += 5
    
    # Test 3: Check values are in valid range [-1, 1] for cosine similarity
    assert scores_flat.min() >= -1.01 and scores_flat.max() <= 1.01, f"Scores out of range: [{scores_flat.min():.3f}, {scores_flat.max():.3f}]"
    fb.append("\u2713 Test 3 passed (valid range)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 2 - Similarity Scores", points, max_points, fb)

In [None]:
# Task 3: Top-K Retrieval (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "top_3_indices" in globals(), "top_3_indices variable not defined"
    indices = globals()["top_3_indices"]
    assert indices is not None, "top_3_indices is None"
    
    # Convert to list/array for checking
    indices_list = list(indices) if hasattr(indices, '__iter__') else [indices]
    
    # Test 1: Check length is 3
    assert len(indices_list) == 3, f"Expected 3 indices, got {len(indices_list)}"
    fb.append("\u2713 Test 1 passed (3 indices)")
    points += 5
    
    # Test 2: Check indices are valid (0-7)
    assert all(0 <= idx <= 7 for idx in indices_list), f"Indices out of range: {indices_list}"
    fb.append("\u2713 Test 2 passed (valid indices)")
    points += 5
    
    # Test 3: Check that finance-related docs are ranked higher than sports
    # Docs 0-4 are finance, docs 5-7 are sports
    # For query about central bank rates, finance docs should dominate top 3
    finance_count = sum(1 for idx in indices_list if idx <= 4)
    assert finance_count >= 2, f"Expected mostly finance docs in top 3 for rate query, got {finance_count}"
    fb.append("\u2713 Test 3 passed (relevant docs retrieved)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 3 - Top-K Retrieval", points, max_points, fb)

In [None]:
# Task 4: FAISS Index and Search (20 points) [Coding]
points = 0
fb = []
max_points = 20

try:
    # Test 1: Check faiss_index exists and is correct type
    assert "faiss_index" in globals(), "faiss_index variable not defined"
    idx = globals()["faiss_index"]
    assert idx is not None, "faiss_index is None"
    assert hasattr(idx, 'ntotal'), "faiss_index doesn't look like a FAISS index"
    assert idx.ntotal == 8, f"Expected 8 vectors in index, got {idx.ntotal}"
    fb.append("\u2713 Test 1 passed (FAISS index created with 8 vectors)")
    points += 5
    
    # Test 2: Check faiss_distances exists and has correct shape
    assert "faiss_distances" in globals(), "faiss_distances variable not defined"
    distances = globals()["faiss_distances"]
    assert distances is not None, "faiss_distances is None"
    assert hasattr(distances, 'shape'), "faiss_distances should be a numpy array"
    assert distances.shape == (1, 3), f"Expected faiss_distances shape (1, 3), got {distances.shape}"
    fb.append("\u2713 Test 2 passed (distances shape correct)")
    points += 5
    
    # Test 3: Check faiss_indices exists and has correct shape
    assert "faiss_indices" in globals(), "faiss_indices variable not defined"
    f_indices = globals()["faiss_indices"]
    assert f_indices is not None, "faiss_indices is None"
    assert hasattr(f_indices, 'shape'), "faiss_indices should be a numpy array"
    assert f_indices.shape == (1, 3), f"Expected faiss_indices shape (1, 3), got {f_indices.shape}"
    fb.append("\u2713 Test 3 passed (indices shape correct)")
    points += 5
    
    # Test 4: Check that FAISS returns relevant results (finance docs for rate query)
    indices_list = list(f_indices[0])
    finance_count = sum(1 for idx in indices_list if idx <= 4)
    assert finance_count >= 2, f"FAISS should return finance docs for rate query, got {indices_list}"
    fb.append("\u2713 Test 4 passed (FAISS returns relevant results)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 4 - FAISS Index", points, max_points, fb)

In [None]:
# Task 5: Build RAG Prompt (15 points) [Coding]
points = 0
fb = []
max_points = 15

try:
    assert "rag_prompt" in globals(), "rag_prompt variable not defined"
    prompt = globals()["rag_prompt"]
    assert prompt is not None, "rag_prompt is None"
    assert isinstance(prompt, str), f"rag_prompt should be a string, got {type(prompt)}"
    
    prompt_lower = prompt.lower()
    
    # Test 1: Check prompt contains the query
    assert "central bank" in prompt_lower or "increase rates" in prompt_lower, "Prompt should include the query"
    fb.append("\u2713 Test 1 passed (contains query)")
    points += 5
    
    # Test 2: Check prompt has context instruction
    has_context = any(word in prompt_lower for word in ["context", "document", "based on", "following"])
    assert has_context, "Prompt should reference context/documents"
    fb.append("\u2713 Test 2 passed (has context instruction)")
    points += 5
    
    # Test 3: Check prompt includes actual corpus documents
    has_docs = any(doc[:20].lower() in prompt_lower for doc in corpus[:5])
    assert has_docs, "Prompt should include retrieved documents from corpus"
    fb.append("\u2713 Test 3 passed (includes retrieved documents)")
    points += 5
    
except AssertionError as e:
    fb.append(f"\u2717 {e}")
except Exception as e:
    fb.append(f"\u2717 Runtime error: {e}")

record_score("Task 5 - RAG Prompt", points, max_points, fb)

In [None]:
# Task 6: RAG and Grounding Explanation (20 points) [Written]
points = 0
fb = []
try:
    r = WRITTEN_RULES["Task 6"]
    assert r["var"] in globals(), f"{r['var']} variable missing"
    text = globals()[r["var"]]
    
    passed, reasons = validate_answer(
        text,
        required_groups=r["groups"],
        forbidden_strings=AI_PHRASES,
        forbidden_characters=FORBIDDEN_CHARS,
        min_length=r["min_len"]
    )
    
    if passed:
        points = r["max_points"]
        fb.append("\u2713 Passed")
    else:
        for reason in reasons:
            fb.append(f"\u2717 {reason}")
            
except AssertionError as e:
    fb.append(f"\u2717 {e}")
record_score("Task 6 - RAG Explanation", points, WRITTEN_RULES["Task 6"]["max_points"], fb)

---
## Generate Results

In [None]:
import json, datetime, re

# Sort scores by task number (extract number from "Task N - ...")
def task_sort_key(item):
    match = re.search(r'Task (\d+)', item[0])
    return int(match.group(1)) if match else 99

sorted_scores = dict(sorted(__assessment_scores.items(), key=task_sort_key))
sorted_feedback = {k: __assessment_feedback[k] for k in sorted_scores.keys()}

# Calculate totals
total_points = sum(s[0] for s in sorted_scores.values())
max_possible = sum(s[1] for s in sorted_scores.values())

result = {
  "scores": sorted_scores,
  "feedback": sorted_feedback,
  "total": f"{total_points}/{max_possible}",
  "percentage": round(100 * total_points / max_possible, 1) if max_possible > 0 else 0,
  "timestamp": datetime.datetime.now().isoformat()
}

with open("assessment_result.json", "w") as f:
    json.dump(result, f, indent=2)

print(f"\n{'='*50}")
print(f"ASSESSMENT RESULTS: {total_points}/{max_possible} ({result['percentage']}%)")
print(f"{'='*50}\n")

for task, (pts, mx) in sorted_scores.items():
    status = "\u2713" if pts == mx else "\u2717" if pts == 0 else "~"
    print(f"{status} {task}: {pts}/{mx}")
    for line in sorted_feedback[task]:
        print(f"    {line}")

result