# Main Pipeline

In [None]:
import os
from typing import Dict, List
import fitz
import google.generativeai as genai
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import List
from pydantic import BaseModel, Field

class Aim(BaseModel):
    """Represents a single aim / learning outcome / objective from the syllabus."""
    title: str = Field(..., description="A concise title summarizing the aim or learning outcome.")
    description: str = Field(..., description="A detailed description of the aim or learning outcome, explaining what students are expected to achieve.")

class Topic(BaseModel):
    """Represents a single topic from the syllabus."""
    title: str = Field(..., description="A concise title summarizing the topic.")
    description: str = Field(..., description="A detailed description of the topic, including subtopics or specific content covered.")

class SyllabusBreakdown(BaseModel):
    """Represents the breakdown of a syllabus chunk."""
    aims: List[Aim] = Field(..., description="List of aims / learning outcomes / objectives extracted from the syllabus.")
    topics: List[Topic] = Field(..., description="List of topics extracted from the syllabus.")

class SyllabusParser:
    def __init__(self, api_key: str):
        if not api_key:
            raise ValueError("API key must be provided.")
        genai.configure(api_key=api_key)
        self.model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

    def extract_text_from_pages(self, pdf_file_path: str, start_page: int, end_page: int, interval: int, overlap: int = 1) -> Dict[str, str]:
        document = fitz.open(pdf_file_path)
        chunks = {}
        total_pages = len(document)
        
        for i in range(start_page, end_page, interval):
            chunk_start = i
            chunk_end = min(i + interval + overlap, end_page)
            
            chunk_text = ''
            for page_num in range(chunk_start, chunk_end):
                if 0 <= page_num < total_pages:
                    page = document.load_page(page_num)
                    chunk_text += page.get_text()
            
            chunks[f"chunk_{i}"] = chunk_text
        return chunks

    def syllabus_template(self):
        """Prompt template for extracting overarching aims and topics with examples."""
        return """
        You are an expert educational analyst. Your task is to extract **overarching aims / learning outcomes**
        and **topics** from a syllabus chunk.

        For aims / learning outcomes:
        - Provide high-level aims that summarize what students should achieve by the end of the course.
        - Each aim should include:
            - "title": a concise summary
            - "description": detailed explanation, including context and a concrete example of the skill or knowledge
        - Example of a high-level aim:
            {{
                "title": "Understand historical change and continuity",
                "description": "Students should be able to identify patterns of change and continuity across historical periods. Context: This helps students understand how societies evolve over time. Example: Compare political systems in pre- and post-World War II Europe."
            }}

        For topics:
        - Provide key content areas that the syllabus covers - such as historical concepts and facts.
        - Each topic should include:
            - "title": a concise summary
            - "description": detailed explanation including subtopics, context, and example
        - Example of a topic:
            {{
                "title": "The Cold War",
                "description": "Study the origins, major events, and outcomes of the Cold War. Context: Understand ideological conflict between superpowers. Example: Analyse the Berlin Blockade as an early flashpoint."
            }}

        Syllabus Chunk:
        {syllabus_chunk}

        Output a JSON object with two lists: 'aims' and 'topics', following the examples above:
        {{
        "aims": [{{"title": "Aim title", "description": "Detailed aim description with context and example"}}],
        "topics": [{{"title": "Topic title", "description": "Detailed topic description with context and example"}}]
        }}
        """


    def parse_syllabus(self, pdf_file_path: str, start_page: int, end_page: int, interval: int) -> Dict[str, Dict[str, List[str]]]:
        syllabus_chunks = self.extract_text_from_pages(
            pdf_file_path=pdf_file_path,
            start_page=start_page,
            end_page=end_page,
            interval=interval
        )
        print(f"Extracted {len(syllabus_chunks)} chunks from PDF.")

        all_aims = []
        all_topics = []

        structured_llm = self.model.with_structured_output(SyllabusBreakdown)
        prompt_template = PromptTemplate.from_template(self.syllabus_template())
        chain = prompt_template | structured_llm

        for chunk_key, chunk_text in syllabus_chunks.items():
            output: SyllabusBreakdown = chain.invoke({"syllabus_chunk": chunk_text})

            if output:
                all_aims.extend([{"title": aim.title, "description": aim.description} for aim in output.aims])
                all_topics.extend([{"title": topic.title, "description": topic.description} for topic in output.topics])

            print(f"Processed {chunk_key}")

        # Remove duplicates based on title
        def unique_by_title(items):
            seen = set()
            unique = []
            for item in items:
                if item["title"] not in seen:
                    seen.add(item["title"])
                    unique.append(item)
            return unique

        final_output = {
            "aims": {f"aim_{i+1}": item for i, item in enumerate(unique_by_title(all_aims))},
            "topics": {f"topic_{i+1}": item for i, item in enumerate(unique_by_title(all_topics))}
        }

        return final_output


In [None]:
import fitz
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda, RunnableParallel
from typing import Dict, Any, List, Literal
import google.generativeai as genai
import os
from pydantic import BaseModel, Field

class FeedbackItem(BaseModel):
    highlighted_text: str = Field(..., description="The specific question or section of the paper that the comment applies to.")
    AI_feedback: str = Field(..., description="Constructive, specific feedback on the question paper.")
    type: Literal["Syllabus Aim", "Syllabus Relevance to Topics"] = Field(..., description="Feedback type: 'Syllabus Aim' for alignment with syllabus aims, 'Syllabus Relevance to Topics' for topic relevance and weightage.")

class ParallelFeedback(BaseModel):
    feedback: List[FeedbackItem] = Field(..., description="List of feedback items for this metric.")
    justification: str = Field(..., description="Detailed summary on how the question paper performed in this metric.")

class QuestionPaperEvaluator:
    def __init__(self, api_key: str):
        genai.configure(api_key=api_key)
        self.model = ChatGoogleGenerativeAI(model="gemini-2.5-flash")
        self.output_parser = JsonOutputParser()

    def build_chain_parallel(self, prompt_template: str) -> RunnableLambda:
        prompt = PromptTemplate.from_template(prompt_template)
        structured_llm = self.model.with_structured_output(ParallelFeedback)
        return prompt | structured_llm

    def get_all_chains(self) -> Dict[str, RunnableLambda]:
        all_templates = {
            "Syllabus Aim": self.alignment_template(),
            "Syllabus Relevance to Topics": self.weightage_template(),
        }
        return {
            marker: self.build_chain_parallel(all_templates[marker])
            for marker in all_templates
        }

    def run_all_markers(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        chains = self.get_all_chains()
        
        # Create an input mapping for each chain
        parallel_chain = RunnableParallel(
            **{
                "Syllabus Aim": chains["Syllabus Aim"].bind(
                    input_variables={"syllabus_aim": inputs["syllabus_aim"], "question_paper": inputs["question_paper"]}
                ),
                "Syllabus Relevance to Topics": chains["Syllabus Relevance to Topics"].bind(
                    input_variables={"topic_distribution": inputs["topic_distribution"], "question_paper": inputs["question_paper"]}
                ),
            }
        )
        
        return parallel_chain.invoke(inputs)
    
    def process_outputs_parallel(self, marker_outputs: Dict[str, Any]) -> str:
            """
            Processes the raw output from parallel agents into a single, structured string.
            """
            processed_feedback = ""
            for marker, feedback_data in marker_outputs.items():
                processed_feedback += f"### Feedback on {marker}\n"
                if feedback_data.justification:
                    processed_feedback += f"Justification: {feedback_data.justification}\n\n"
                if feedback_data.feedback:
                    for item in feedback_data.feedback:
                        processed_feedback += f"**Section:** {item.highlighted_text}\n"
                        processed_feedback += f"**Feedback:** {item.AI_feedback}\n\n"
                processed_feedback += "---\n\n"
            return processed_feedback

    def run_evaluation_workflow(self, question_paper_path: str, syllabus_aim: str, topic_distribution: str, start_page: int = 0, end_page: int = 10, interval: int = 7, overlap: int = 1):
        # 1. Extract text from the PDF file
        chunks = self.extract_text_from_pages(question_paper_path, start_page, end_page, interval, overlap)
        question_paper_text = ""
        for chunk in chunks.values():
            question_paper_text += chunk

        inputs = {
            "question_paper": question_paper_text,
            "syllabus_aim": syllabus_aim,
            "topic_distribution": topic_distribution,
        }
        
        # 2. Run parallel agents
        marker_outputs = self.run_all_markers(inputs)
        
        # 3. Process outputs
        processed_feedback = self.process_outputs_parallel(marker_outputs)

        return processed_feedback
        
    def alignment_template(self) -> str:
        return """
        You are a quality assurance agent for question papers. Your task is to evaluate a question paper for its **alignment with the syllabus's overall aim**.
        
        Syllabus Aim:
        {syllabus_aim}
        
        Question Paper:
        {question_paper}
        
        For each question in the question paper, identify if it directly aligns with a learning objective or topic in the syllabus. If a question is not aligned, or if its difficulty is inappropriate, provide specific feedback. 
        """

    def weightage_template(self) -> str:
        return """
        You are a curriculum expert. Your task is to evaluate the topic's mark weightage of a question paper based on an ideal distribution, which reflects its **relevance to the syllabus's topics**.
        
        Ideal Topic Distribution:
        {topic_distribution}
        
        Question Paper:
        {question_paper}
        
        Analyze the question paper to determine the number of marks allocated to each topic. Compare this to the ideal topic distribution provided. Identify any significant deviations where a topic is over or under-represented.
        """
        
    def extract_text_from_pages(self, pdf_file_path: str, start_page: int, end_page: int = None, interval: int = 5, overlap: int = 1) -> Dict[str, str]:
        document = fitz.open(pdf_file_path)
        chunks = {}
        total_pages = len(document)
        
        if end_page is None:
            end_page = total_pages
        
        for i in range(start_page, end_page, interval):
            chunk_start = i
            chunk_end = min(i + interval + overlap, end_page)
            
            chunk_text = ''
            for page_num in range(chunk_start, chunk_end):
                if 0 <= page_num < total_pages:
                    page = document.load_page(page_num)
                    chunk_text += page.get_text()
            
            chunks[f"chunk_{i}"] = chunk_text
        return chunks

In [None]:
# Syallbus Parsing (Run this first)

API_KEY = os.environ.get("GOOGLE_API_KEY")

pdf_file_path = r"C:\Users\leege\Documents\Govtech\Assesment\question3\2174_y24_sp_1.pdf"
parser = SyllabusParser(api_key=API_KEY)

start_page = 1
end_page = 19
interval = 6  

syllabus_data = parser.parse_syllabus(
    pdf_file_path=pdf_file_path,
    start_page=start_page,
    end_page=end_page,
    interval=interval
)
print("\n--- Aims ---")
for aim_key, aim_text in syllabus_data['aims'].items():
    print(f"{aim_key}: {aim_text}")

print("\n--- Topics ---")
for topic_key, topic_text in syllabus_data['topics'].items():
    print(f"{topic_key}: {topic_text}")

Extracted 3 chunks from PDF.
Processed chunk_1
Processed chunk_7
Processed chunk_13

--- Aims ---
aim_1: {'title': 'Analyze historical sources and interpret their significance', 'description': 'Students will be able to evaluate the reliability and usefulness of different types of historical sources, considering their origin, purpose, and context.  Context: This skill is crucial for understanding historical events and perspectives. Example: Analyze a speech by Hitler and a letter by Rothermere to assess their perspectives on appeasement.'}
aim_2: {'title': 'Formulate and support historical arguments', 'description': "Students will be able to construct well-supported arguments about historical events, using evidence from sources and their own knowledge. Context: This requires critical thinking and the ability to synthesize information. Example: Argue the extent to which Chamberlain's policy of appeasement was successful, using evidence from various sources."}
aim_3: {'title': 'Understand

In [None]:
# Question paper evaluator (Run this second)
from typing import Dict

def convert_syllabus_to_text(syllabus_data: Dict[str, Dict[str, str]]):

    syllabus_text_aim = ""
    syllabus_text_topic = ""
    
    # Add Aims section and count them
    syllabus_text_aim += "--- Aims ---\n"
    if 'aims' in syllabus_data:
        for aim_key, aim_text in syllabus_data['aims'].items():
            syllabus_text_aim += f"{aim_key}: {aim_text}\n"

    # Add Topics section and count them
    syllabus_text_topic += "\n--- Topics ---\n"
    if 'topics' in syllabus_data:
        for topic_key, topic_text in syllabus_data['topics'].items():
            syllabus_text_topic += f"{topic_key}: {topic_text}\n"

    return syllabus_text_aim.strip(), syllabus_text_topic.strip()


syllabus_aim,syllabus_topic = convert_syllabus_to_text(syllabus_data)
API_KEY = os.environ.get("GOOGLE_API_KEY")
pdf_file_path = r"C:\Users\leege\Documents\Govtech\Assesment\question3\syllabus.pdf"

# Create evaluator instance
evaluator = QuestionPaperEvaluator(api_key=API_KEY)

# 3. Call the evaluation workflow
summary_result = evaluator.run_evaluation_workflow(
    question_paper_path=pdf_file_path,
    syllabus_aim=syllabus_aim,
    topic_distribution=syllabus_topic
)

# 4. Print the final summary
print(summary_result)


### Feedback on Syllabus Aim
Justification: The provided syllabus document (Singapore–Cambridge General Certificate of Education Ordinary Level (2025) History (Syllabus 2174)) demonstrates a comprehensive alignment with the overarching syllabus aims provided. The syllabus's own stated aims, learning outcomes (covering knowledge, skills, and values), and assessment objectives collectively address the development of critical historical thinking, source analysis, argument formulation, understanding of causation and context, and the analysis of historical events and their impacts. Specific content aims regarding World War I's consequences and impact are well-integrated into the examinable topics. A minor nuance exists concerning the explicit examinability of the causes of World War I itself, as the syllabus notes it as 'non-examinable' while covering its broader consequences and impact.

**Section:** aim_1: Analyze historical sources and interpret their significance
**Feedback:** The sylla

# Evaluation Pipeline - LLM as a Judge

In [93]:
import os
from typing import Literal
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableLambda
from pydantic import BaseModel, Field
import google.generativeai as genai

class QualitativeFeedback(BaseModel):
    positives: str = Field(..., description="What the LLM feedback got right and was helpful or accurate.")
    negatives: str = Field(..., description="What the LLM feedback got wrong or where it was inaccurate or irrelevant.")
    alignment: str = Field(..., description="How well the feedback aligns with the syllabus and question paper requirements.")

class EvaluationResult(BaseModel):
    score: int = Field(..., description="A numerical score from 1 to 10 evaluating the feedback's accuracy and helpfulness.")
    qualitative_feedback: QualitativeFeedback = Field(..., description="Detailed qualitative feedback on the LLM's performance.")

class FeedbackAccuracyJudge:
    def __init__(self, api_key: str, feedback_type: Literal["Syllabus Aim", "Syllabus Topics", "Clarity of Feedback"]):
        if not api_key:
            raise ValueError("API key must be provided.")
        genai.configure(api_key=api_key)
        self.model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
        self.feedback_type = feedback_type
        self.judge_chain = self._build_judge_chain()

    def _build_judge_chain(self) -> RunnableLambda:
        """
        Builds the specific judge chain for the chosen feedback type.
        """
        prompt_template = self._judge_template()
        prompt = PromptTemplate.from_template(prompt_template)
        structured_llm = self.model.with_structured_output(EvaluationResult)
        return prompt | structured_llm

    def _judge_template(self) -> str:
            """
            Prompt template for the LLM Judge to evaluate the accuracy of LLM feedback.
            """
            return f"""
            You are an expert educational assessor, acting as a judge to evaluate the accuracy and usefulness of an LLM's feedback. Your task is to analyze three pieces of information: a syllabus, a question paper, and a specific piece of feedback provided by another LLM.

            **Instructions:**
            1.  **Analyze the Syllabus:** Identify the key learning objectives, topics, and specific skills students are expected to demonstrate.
            2.  **Analyze the Question Paper:** Determine the core requirements of the question paper.
            3.  **Analyze the LLM Feedback:** Evaluate the feedback's accuracy, relevance, and helpfulness, specifically in the context of **{self.feedback_type}**.
            4.  **FOCUS ON: {self.feedback_type} ONLY**.

            **Your Final Judgment Should Include:**
            -   **A Score (1-10):** A numerical score from 1 to 10. A score of 1 means the feedback is completely inaccurate or irrelevant. A score of 10 means the feedback is perfectly aligned with the syllabus, highly accurate, and extremely helpful.
            -   **Qualitative Feedback:** Provide a detailed explanation of your score.
                -   **Positives:** What did the LLM feedback get right? What aspects were particularly helpful or accurate?
                -   **Negatives/Areas for Improvement:** What did the LLM feedback get wrong? Where did it miss the mark? Does it hallucinate information, or provide irrelevant suggestions?
                -   **Alignment:** Comment on how well the feedback aligns with both the syllabus and the question paper's requirements.

            **Input:**
            -   **Syllabus:** {{syllabus_text}}
            -   **Question Paper:** {{question_paper_text}}
            -   **LLM Feedback:** {{llm_feedback_text}}

            **Output Format (JSON):**
            {{{{
                "score": <integer_from_1_to_10>,
                "qualitative_feedback": {{{{
                    "positives": "...",
                    "negatives": "...",
                    "alignment": "..."
                }}}}
            }}}}
            """

    def run_evaluation(self, syllabus_text: str, question_paper_text: str, llm_feedback_text: str) -> EvaluationResult:
        """
        Runs the full evaluation workflow and returns a structured output.
        """
        inputs = {
            "syllabus_text": syllabus_text,
            "question_paper_text": question_paper_text,
            "llm_feedback_text": llm_feedback_text
        }
        return self.judge_chain.invoke(inputs)

In [94]:
def split_aim_topic_feedback(text):
    import re

    # Extract aim feedback
    aim_match = re.search(r'### Feedback on Syllabus Aim\s*(.*?)\s*###', text, re.DOTALL)
    aim_feedback = aim_match.group(1).strip() if aim_match else ""

    # Extract topic feedback
    topic_match = re.search(r'### Feedback on Syllabus Relevance to Topics\s*(.*)', text, re.DOTALL)
    topic_feedback = topic_match.group(1).strip() if topic_match else ""

    return aim_feedback, topic_feedback

def extract_text_from_pages(pdf_file_path: str, start_page: int, end_page: int = None, interval: int = 5, overlap: int = 1) -> Dict[str, str]:
    document = fitz.open(pdf_file_path)
    chunks = {}
    total_pages = len(document)
    
    if end_page is None:
        end_page = total_pages
    
    for i in range(start_page, end_page, interval):
        chunk_start = i
        chunk_end = min(i + interval + overlap, end_page)
        
        chunk_text = ''
        for page_num in range(chunk_start, chunk_end):
            if 0 <= page_num < total_pages:
                page = document.load_page(page_num)
                chunk_text += page.get_text()
        
        chunks[f"chunk_{i}"] = chunk_text
    return chunks

# Preparing docs
pdf_file_path = r"C:\Users\leege\Documents\Govtech\Assesment\question3\2174_y24_sp_1.pdf"
question_paper = extract_text_from_pages(pdf_file_path, 0, 10, 10, 1) 
aim_feedback, topic_feedback = split_aim_topic_feedback(summary_result)

# Flow
API_KEY = os.environ.get("GOOGLE_API_KEY")
aim_judge = FeedbackAccuracyJudge(api_key=API_KEY, feedback_type="Syllabus Aim")
aim_result = aim_judge.run_evaluation(syllabus_text=syllabus_aim, question_paper_text=question_paper, llm_feedback_text=aim_feedback)
print(aim_result)
print('-'*50)

topic_judge = FeedbackAccuracyJudge(api_key=API_KEY, feedback_type="Syllabus Topics")
topic_result = aim_judge.run_evaluation(syllabus_text=syllabus_aim, question_paper_text=question_paper, llm_feedback_text=aim_feedback)
print(topic_result)
print('-'*50)

topic_judge = FeedbackAccuracyJudge(api_key=API_KEY, feedback_type="Clarity of Feedback")
topic_result = aim_judge.run_evaluation(syllabus_text=syllabus_aim, question_paper_text=question_paper, llm_feedback_text=aim_feedback)
print(topic_result)


score=10 qualitative_feedback=QualitativeFeedback(positives="The LLM feedback accurately identifies the syllabus's strong alignment with all eight aims.  It correctly highlights the syllabus's emphasis on source analysis, argument formulation, understanding of causation and context, and the analysis of historical events and their impacts. The detailed breakdown of each aim, referencing specific sections of the syllabus (Aims, Learning Outcomes, Assessment Objectives) demonstrates a thorough understanding of the document.  The feedback is well-structured and easy to understand.", negatives="While the feedback points out the 'non-examinable' status of WWI causes in the syllabus, it doesn't significantly detract from the overall positive assessment. The minor discrepancy is acknowledged appropriately. ", alignment="The alignment between the feedback and the syllabus is excellent. The LLM feedback accurately reflects the syllabus aims and demonstrates a clear understanding of how the sylla

# Evaluation - Golden dataset Augmentation

In [99]:
import os
from typing import Literal, List, Dict, Any, Optional
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableLambda
from pydantic import BaseModel, Field
import google.generativeai as genai

class TestSet(BaseModel):
    """Represents a single test case for evaluation."""
    text: str = Field(..., description="The generated question or text based on the syllabus.")
    exact_reference: Optional[str] = Field(..., description="The exact syllabus content this text is relevant to. None if the text is irrelevant.")

class TestSetGenerationResult(BaseModel):
    """Represents the complete output of the test set generation."""
    test_cases: List[TestSet] = Field(..., description="A list of generated test cases.")

class TestSetGenerator:
    def __init__(self, api_key: str):
        if not api_key:
            raise ValueError("API key must be provided.")
        genai.configure(api_key=api_key)
        self.model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
        self.generator_chain = self._build_generator_chain()

    def _build_generator_chain(self) -> RunnableLambda:
        """
        Builds the generator chain for creating test sets.
        """
        prompt_template = self._generator_template()
        prompt = PromptTemplate.from_template(prompt_template)
        structured_llm = self.model.with_structured_output(TestSetGenerationResult)
        return prompt | structured_llm

    def _generator_template(self) -> str:
        """
        Prompt template for generating a test set with exact references.
        """
        return """
        You are a curriculum expert and test paper designer. Your task is to generate a set of test questions or paper excerpts based on a provided syllabus chunk.

        You must generate two types of examples:
        1.  **Relevant Text**: A test question that directly and accurately assesses the provided syllabus aim or topic. For this, you must provide the **exact syllabus content** it references.
        2.  **Irrelevant Text**: A test question that is completely irrelevant and outside of the provided syllabus aim or topic. For this, the reference should be `null`.

        For each generated text, you must provide the exact syllabus content it directly relates to, or `null` if it is not relevant.

        **Syllabus Item to Base Generation On:**
        Type: {syllabus_item_type}
        Content: {syllabus_item_content}

        **Instructions:**
        Generate a list of up to {num_examples} examples. Ensure a mix of both relevant and irrelevant cases.

        **Output Format (JSON):**
        ```json
        {{{{
            "test_cases": [
                {{{{
                    "text": "A generated question or text.",
                    "exact_reference": "The exact relevant syllabus content"
                }}}},
                {{{{
                    "text": "An irrelevant question or text.",
                    "exact_reference": null
                }}}},
                ...
            ]
        }}}}
        ```
        """

    def generate_test_set(
        self,
        syllabus_item_type: Literal["aim", "topic"],
        syllabus_item_content: str,
        num_examples: int = 50
    ) -> TestSetGenerationResult:
        
        if num_examples < 2 or num_examples % 2 != 0:
            raise ValueError("num_examples must be an even number greater than 1.")
            
        inputs = {
            "syllabus_item_type": syllabus_item_type,
            "syllabus_item_content": syllabus_item_content,
            "num_examples": num_examples
        }
        return self.generator_chain.invoke(inputs)

In [100]:
API_KEY = os.environ.get("GOOGLE_API_KEY")
generator = TestSetGenerator(api_key=API_KEY)
syllabus_topic = syllabus_topic

# Generate a test set of 4 examples based on this topic
test_set_result = generator.generate_test_set(
    syllabus_item_type="topic", 
    syllabus_item_content=syllabus_topic, 
    num_examples=4
)

print(test_set_result.model_dump_json(indent=2))


{
  "test_cases": [
    {
      "text": "Explain the background and consequences of Neville Chamberlain's appeasement policy in the run-up to World War II.",
      "exact_reference": "topic_1: {'title': 'Appeasement Policy of Neville Chamberlain', 'description': 'Examine the background, implementation, and consequences of the appeasement policy pursued by Neville Chamberlain in the lead-up to World War II.  Context: Understand the political climate and international relations in the 1930s. Example: Analyze the Munich Agreement and its impact on Czechoslovakia and subsequent events.'}"
    },
    {
      "text": "Discuss the impact of the Industrial Revolution on the rise of global trade.",
      "exact_reference": "null"
    },
    {
      "text": "Analyze the key events and agreements of the Munich Conference in 1938 and their impact on subsequent events.",
      "exact_reference": "topic_3: {'title': 'The Munich Conference (1938)', 'description': 'Study the events and agreements reac