COGS 160 Auto-Grader Notebook for Architect Assignments

Imports

In [1]:
import os
import re
import json
import fitz  
from PIL import Image
from io import BytesIO
from urllib.parse import urlparse
import spacy
import google.generativeai as genai
from dotenv import load_dotenv
from IPython.display import display
nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm


Configure Gemini

In [2]:
from dotenv import load_dotenv
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY) 
text_model = genai.GenerativeModel("gemini-1.5-pro-latest")
vision_model = genai.GenerativeModel("gemini-pro-vision")

Rubric

In [3]:
rubric = {
    "architect_chosen": 5,
    "bio_750_words": 10,
    "bio_structure": 10,
    "bio_references": 10,
    "10_buildings_with_images": 15,
    "image_quality": 10,
    "image_citations": 10,
    "personal_bio_photo": 5,
    "doc_and_slides": 5,
    "image_relevance": 10,
    "presentation_polish": 20,
}

 Extract text from PDF
 

In [17]:
pdf_path = "/Users/tanishqsingh/Desktop/XR_Lab/cogs160submisson1.pdf"

In [5]:
def extract_text_from_pdf(pdf_path):
    print(f"🔍 Extracting text from: {pdf_path}")
    text = ""
    doc = fitz.open(pdf_path)
    for page in doc:
        text += page.get_text()
    print("✔ Extracted text from PDF")
    return text

 Extract images from PDF

In [6]:
def extract_images_from_pdf(pdf_path, min_width=1200, save_folder="/Users/tanishqsingh/Desktop/XR_Lab/Extracted_images"):
    print(f" Extracting images from: {pdf_path}")
    doc = fitz.open(pdf_path)
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    image_data = []
    for page_index in range(len(doc)):
        images = doc.get_page_images(page_index)
        for img_index, img in enumerate(images):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            img_pil = Image.open(BytesIO(image_bytes))
            width, height = img_pil.size
            img_pil.save(os.path.join(save_folder, f"page{page_index+1}_img{img_index+1}.png"))
            image_data.append({
                "page": page_index + 1,
                "width": width,
                "height": height,
                "image": img_pil,
                "is_high_res": width >= min_width
            })
    print(f" Extracted {len(image_data)} images from PDF")
    return image_data

Evaluate biography structure & word count

In [7]:
def evaluate_biography(text):
    print(" Evaluating biography: checking word count and required sections")
    result = {}
    doc = nlp(text)
    result["word_count"] = len([token.text for token in doc if token.is_alpha])
    required_sections = ["who they are", "studied", "first building", "significance", "influence"]
    section_hits = sum([1 for section in required_sections if section.lower() in text.lower()])
    result["structure_score"] = int((section_hits / len(required_sections)) * rubric["bio_structure"])
    result["score"] = rubric["bio_750_words"] if result["word_count"] >= 700 else int((result["word_count"] / 750) * rubric["bio_750_words"])
    return result

Gemini Bio Evaluation

In [8]:
def gemini_bio_score(text, architect_name, debug=False):
    print(f" Sending biography text to Gemini for evaluation of {architect_name}")
    prompt = f"""
You are grading a student's biography of the architect {architect_name}.
Evaluate:
- Who they are
- What they’re famous for
- Where they studied
- Significance in architecture
- Influence of buildings
- Types of buildings
- First building attributed
Give a score out of 10 and 1-paragraph feedback.
"""
    response = text_model.generate_content([prompt, text])
    if debug:
        print(" Gemini Bio Evaluation (Initial):", response.text)
        print(" Asking Gemini to reconsider harshness in bio scoring...")
        retry_prompt = "Was this scoring too harsh? Re-evaluate the student’s biography with more weight on effort and project instructions."
        reconsidered = text_model.generate_content([retry_prompt, response.text])
        print(" Reconsidered Bio Evaluation:", reconsidered.text)
    if debug:
        print("Full Gemini Rubric Feedback:", response.text)
        print(" Asking Gemini to reconsider harshness...")
        retry_prompt = "Was this scoring overly harsh? Re-evaluate with more weight on the student's effort and assignment instructions."
        second_response = text_model.generate_content([retry_prompt, response.text])
        print(" Reconsidered Response:", second_response.text)
    return response.text

Extract references from text

In [9]:
def extract_references_from_text(text):
    print("🔍 Extracting references from text")
    lines = text.split("\n")
    references = []
    for line in lines:
        if re.search(r"\(\d{4}\)", line) and any(x in line.lower() for x in ["doi", "archdaily", "e-architect", "https://"]):
            references.append(line.strip())
    return references

Score references

In [10]:
def evaluate_references(ref_list):
    print(" Evaluating references")
    if not ref_list:
        return {"valid_references": 0, "score": 0}

    joined_refs = "\n".join(ref_list)
    prompt = f"""
You are an academic writing assistant.
Below is a list of references extracted from a student's architecture assignment:

{joined_refs}

Evaluate the overall quality of these references based on the following:
- Are they properly formatted in APA style?
- Are they from credible sources (e.g., books, peer-reviewed journals, respected architecture websites)?
- Are there enough academic references (minimum of 5 is ideal)?

Give a score out of 10 for reference quality, and provide a short justification.
"""
    response = text_model.generate_content([prompt])
    print("📚 Gemini Reference Evaluation:\n", response.text)
    score_match = re.search(r"(\d{1,2})/10", response.text)
    score = int(score_match.group(1)) if score_match else min(len(ref_list), rubric["bio_references"])
    return {"valid_references": len(ref_list), "score": score}

Score image resolution

In [11]:
def evaluate_image_quality(image_data):
    print("🔍 Evaluating image resolution")
    high_res_count = sum(1 for img in image_data if img["is_high_res"])
    return {"high_res_count": high_res_count, "score": int((high_res_count / max(1, len(image_data))) * rubric["image_quality"])}

Gemini: score image relevance

In [12]:
def evaluate_image_relevance(image_data, architect_name, debug=False):
    print("🔍 Evaluating image relevance using Gemini")
    relevance_scores = []
    for img in image_data:
        prompt = f"""
You are evaluating whether this image is relevant to a project on the architect {architect_name}.
1. Does this image depict a building by {architect_name}? If yes, say which building if you can.
2. Is this an interior or exterior shot?
3. Is this a high-quality academic image that clearly shows architectural features (composition, lighting, layout)?
Give a score out of 10 for academic relevance with a brief justification.
"""
        try:
            response = vision_model.generate_content([img["image"], prompt])
            if debug:
                print(f"📷 Gemini Vision Feedback (Page {img['page']}):", response.text)
            match = re.search(r"(\d{1,2})/10", response.text)
            score = int(match.group(1)) if match else 5
        except:
            score = 5
        relevance_scores.append(score)
    avg_score = sum(relevance_scores) / max(1, len(relevance_scores))
    return {"avg_score": avg_score, "score": int((avg_score / 10) * rubric["image_relevance"])}

Gemini: score remaining rubric items

In [19]:
def gemini_full_rubric_eval(text, architect_name, debug=False):
    print(" Gemini evaluating extended rubric")
    prompt = (
        f"""
You are grading a student submission for a university architecture course. The project includes:

- A biography of {architect_name}
- 10 buildings they designed, with exterior and interior images
- Proper image citations
- 5–10 academic references
- A personal student bio and image

Follow this official rubric, scoring each from 1–5:

1. **Architect Selection & Scope** – Is the architect from Book Two? Is the scope appropriate?
2. **Organization & Doc Setup** – Table of contents? Clear sections? Logical structure?
3. **Image Citation & Attribution** – Are URLs provided? Are captions included under each image?
4. **Coverage of 10 Buildings** – Are there 10 buildings? With 5+ interior images per building?
5. **Student Bio & Photo** – Is the student’s 1-page bio included after the main content? Is the photo clear and professional? there is always an image included so should be a 5
6. **Presentation Polish** – Consistent formatting, readability, academic tone, clean layout

For each category, give a score from 1 (poor) to 5 (excellent), and justify briefly.
"""
    )
    response = text_model.generate_content([prompt, text])
    print(response.text)
    def extract_score(label):
        match = re.search(label + r".*?(\d{1})", response.text, re.IGNORECASE)
        return int(match.group(1)) if match else 3

    return {
        "architect_chosen": {"score": extract_score("Architect Selection")},
        "doc_and_slides": {"score": extract_score("Organization")},
        "image_citations": {"score": extract_score("Image Citation")},
        "10_buildings_with_images": {"score": extract_score("Coverage")},
        "personal_bio_photo": {"score": extract_score("Personal Bio")},
        "presentation_polish": {"score": extract_score("Presentation")},
    }

Using chain of thought 

In [None]:
def gemini_full_rubric_eval(text, architect_name, debug=False):
    print(" Gemini evaluating extended rubric")
    prompt = (
        f"""
You are grading a student submission for a university architecture course. The project includes:

- A biography of {architect_name}
- 10 buildings they designed, with exterior and interior images
- Proper image citations
- 5–10 academic references
- A personal student bio and image

Follow this official rubric. For **each category**, think step by step. First, describe your reasoning based on what you observed in the submission. Then, provide a score from 1 (poor) to 5 (excellent). Be generous but fair.

Use the following format per category:

**[Category Name]**
Step-by-step reasoning: ...
Score: x/5

The categories are:

1. **Architect Selection & Scope**  
   - Is the architect from Book Two?  
   - Is the project’s coverage of their work appropriate?

2. **Organization & Doc Setup**  
   - Does the document have a table of contents?  
   - Are sections clearly separated and logically ordered?

3. **Image Citation & Attribution**  
   - Are all images properly captioned with source or photographer?  
   - Are links or attributions provided clearly?

4. **Coverage of 10 Famous Buildings**  
   - Are 10 buildings discussed?  
   - Are there multiple images (exterior + 5+ interior) per building?

5. **Student Bio & Photo**  
   - Is there a 1-page student bio included after the main content?  
   - Is a photo present and reasonably professional?

6. **Presentation & Polish**  
   - Is formatting consistent?  
   - Is the writing clear and academically appropriate?  
   - Does the layout look well-organized?

Please output your full evaluation using the structure above. Think carefully for each item before scoring. Be constructive in feedback.
"""
    )
    response = text_model.generate_content([prompt, text])
    print(response.text)
    def extract_score(label):
        match = re.search(label + r".*?(\d{1})", response.text, re.IGNORECASE)
        return int(match.group(1)) if match else 3

    return {
        "architect_chosen": {"score": extract_score("Architect Selection")},
        "doc_and_slides": {"score": extract_score("Organization")},
        "image_citations": {"score": extract_score("Image Citation")},
        "10_buildings_with_images": {"score": extract_score("Coverage")},
        "personal_bio_photo": {"score": extract_score("Personal Bio")},
        "presentation_polish": {"score": extract_score("Presentation")},
    }

Score aggregation

In [14]:
def generate_scorecard(scores):
    print(" Generating scorecard")
    total = sum([v["score"] for v in scores.values()])
    return {
        "scorecard": {k: v["score"] for k, v in scores.items()},
        "final_score": total,
        "grade": "A" if total >= 90 else "B" if total >= 80 else "C" if total >= 70 else "D",
        "details": scores
    }

Main pipeline

In [20]:
def run_autograder(pdf_path, architect_name="Bjarke Ingels"):
    print(" Starting pipeline")
    doc_text = extract_text_from_pdf(pdf_path)
    images = extract_images_from_pdf(pdf_path)
    references = extract_references_from_text(doc_text)

    scores = {
        "bio_750_words": {"score": evaluate_biography(doc_text)["score"]},
        "bio_structure": {"score": evaluate_biography(doc_text)["structure_score"]},
        "bio_references": evaluate_references(references),
        "image_quality": evaluate_image_quality(images),
        "image_relevance": evaluate_image_relevance(images, architect_name)
    }
    gemini_scores = gemini_full_rubric_eval(doc_text, architect_name)
    scores.update(gemini_scores)
    bio_feedback = gemini_bio_score(doc_text, architect_name)
    print("\n Gemini Feedback on Bio:\n", bio_feedback)
    print(" Evaluation complete.")

RESULTS

In [21]:
result = run_autograder(pdf_path, "Bjarke Ingels")  
print(json.dumps(result, indent=2))

 Starting pipeline
🔍 Extracting text from: /Users/tanishqsingh/Desktop/XR_Lab/cogs160submisson1.pdf
✔ Extracted text from PDF
 Extracting images from: /Users/tanishqsingh/Desktop/XR_Lab/cogs160submisson1.pdf
 Extracted 79 images from PDF
🔍 Extracting references from text
 Evaluating biography: checking word count and required sections
 Evaluating biography: checking word count and required sections
 Evaluating references
🔍 Evaluating image resolution
🔍 Evaluating image relevance using Gemini
 Gemini evaluating extended rubric
Here's a grading of the student's submission based on the provided rubric:

* **1. Architect Selection & Scope (5/5):** Bjarke Ingels is a relevant and appropriate choice for a contemporary architecture course. The scope of covering 10 buildings is acceptable, providing a reasonable overview of his work.

* **2. Organization & Doc Setup (3/5):**  While a table of contents is present and the sections are generally clear, the document's structure could be improved. 