# [Experiment] Impact of Prompt Engineering Techniques used on Generated Response

- COT
- Few-shot prompting

## 1. Setup

In [1]:
###################################
##### Setup Working Directory #####
###################################
import os

cwd = os.getcwd()

# Change directory to root to import app directory functions
if cwd.split("\\")[-1] == "experiments":
    cwd = "\\".join(os.getcwd().split("\\")[:-1])

os.chdir(cwd)
print("Current Working Directory:")
print(os.getcwd())

Current Working Directory:
c:\Users\shuti\OneDrive\Documents\Term 7 Modules\50.045 Information Retrieval\Project\eduRAG


In [2]:
import os
from pymongo import MongoClient

# connect to MongoDB
MONGODB_URI = os.environ.get("MONGODB_URI")
print(f"Connecting to MongoDB at {MONGODB_URI}")
mongo_client = MongoClient(MONGODB_URI)
db = mongo_client["exam_db"]
question_collection = db["question"]

Connecting to MongoDB at mongodb+srv://admin:Z5TQpg4qtNoOggBt@cluster0.x0nvy.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0


In [3]:
import openai
import os
import json
from typing import Optional, Any
from dotenv import load_dotenv
from fastapi import HTTPException, Body
from app.utils.format_utils import (
    normalise_query,
    format_first_question_xml,
)
from app.utils.openai_utils import get_embedding
from app.db.vector_search import vector_search
from app.utils.openai_utils import (
    _encode_image,
)
from app.models import (
    Message,
    Role,
    GeneratedQuestionList
)
from app.utils.image_utils import extract_question_metadata, find_and_crop_image
from app import constants
from ulid import ULID

def get_generated_questions_and_answers(
    question_details: str, 
    image_filepath: str, 
    aggregated_metadata: dict,
    supplementary_system_prompt: str # Experimental Feature
):
    # Encode the image to base64 format
    base64_image = _encode_image(image_filepath)
    
    # Extract aggregated metadata for context
    topics = ", ".join(aggregated_metadata["topics"])
    sub_topics = ", ".join(aggregated_metadata["sub_topics"])
    links = ", ".join(aggregated_metadata["links"])

    # Experimental Feature
    # Update the system prompt to include metadata and ensure diversity
    system_prompt = f"""
        {constants.SYSTEM_PROMPT_GENERATE_QUESTIONS}

        Context for question generation:
        - Topics: {topics}
        - Sub-topics: {sub_topics}
        - References: {links}

        Generate questions that are aligned with the provided question details while incorporating the above context for diversity.

        {supplementary_system_prompt}
    """

    # Construct the messages for OpenAI API
    messages = [
        {"role": Role.SYSTEM, "content": system_prompt},
        {
            "role": Role.USER,
            "content": [
                {
                    "type": "text",
                    "text": question_details,
                },
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/png;base64,{base64_image}"},
                },
            ],
        },
    ]

    # Generate completion using OpenAI API
    completion = openai.beta.chat.completions.parse(
        model=os.environ.get("OPENAI_MODEL"),
        messages=messages,
        response_format=GeneratedQuestionList,
    )

    # Return the parsed response containing generated questions
    return completion.choices[0].message.parsed

def query(
    user_query: list[Message],
    supplementary_system_prompt: str, # Experimental Feature
    subject: Optional[str] = Body(default="elementary_mathematics"),
    level: Optional[str] = Body(default=None),
    exam_type: Optional[str] = Body(default=None),
):
    # try:
    # Normalize the query
    user_query = normalise_query(user_query)
    
    # Perform vector search to find similar questions
    results = vector_search(
        user_query[-1].content, question_collection, [subject, level, exam_type],
    )
    if not results:
        raise HTTPException(
            status_code=404,
            detail="No similar questions found. Please try again with a different question.",
        )
    
    # Initialize variables to store aggregated metadata and output
    aggregated_metadata = {"topics": set(), "sub_topics": set(), "links": set()}
    questions_xml = ""
    output_jsons = []

    # Process each result to aggregate metadata and prepare XML
    for result in results:
        (
            question_paper_filepath,
            question_body,
            image_filename,
            page_start,
            page_end,
        ) = extract_question_metadata(result)

        # Crop the question image
        find_and_crop_image(
            pdf_url=question_paper_filepath,
            search_text=question_body,
            question_filename=image_filename,
            page_start=page_start,
            page_end=page_end,
        )
        image_filepath = f"{constants.TEMP_DIR}/{image_filename}.png"

        # Aggregate metadata
        aggregated_metadata["topics"].add(result["topic"])
        aggregated_metadata["sub_topics"].add(result["sub_topic"])
        aggregated_metadata["links"].add(result.get("question_paper_filepath", ""))

        # Generate XML for this question
        question_xml = format_first_question_xml([result])
        questions_xml += question_xml + "\n"
    
    # Convert aggregated metadata sets to lists
    aggregated_metadata = {key: list(value) for key, value in aggregated_metadata.items()}

    # Generate questions based on aggregated metadata and all ground-truth documents
    response = get_generated_questions_and_answers(
        question_details=questions_xml,
        image_filepath=image_filepath,
        aggregated_metadata=aggregated_metadata,  # Pass aggregated context
        supplementary_system_prompt=supplementary_system_prompt # Experimental Feature
    )

    # Save the generated questions and answers to a JSON file
    os.makedirs(constants.OUTPUT_DIR, exist_ok=True)
    json_filepath = f"{constants.OUTPUT_DIR}/{str(ULID())}.json"
    response_dict = response.model_dump()
    response_dict["ground_truth"] = aggregated_metadata
    output_jsons.append(response_dict)

    with open(json_filepath, "w") as f:
        json.dump(output_jsons, f, indent=4)

    return {"response": output_jsons, "first_question": questions_xml}

## 2. Experiments

In [4]:
MOCK_DATA = {
    "user_query": [
        Message(
            role="user",
            content="Give me questions related to binomial theorem."
        )
    ],
    "subject": "additional_mathematics"   
}

In [5]:
results = {}

### 2.1 Baseline

In [6]:
print(f"----- Baseline Prompting -----")

response = query(
    user_query = MOCK_DATA["user_query"],
    subject = MOCK_DATA["subject"],
    supplementary_system_prompt = ""
)
results["baseline_prompting"] = response

# Print Results
print(response)


----- Baseline Prompting -----
Found match on page 21. Saved to temp/fairfield_methodist_school_secondary_elementary_mathematics_preliminary_exam_2024_1_24i.png
No matching text found in PDF - saving full pages
Saved combined pages 22 to 22 to temp/bukit_panjang_government_high_school_elementary_mathematics_preliminary_exam_2023_1_26a.png
Found match on page 12. Saved to temp/fairfield_methodist_school_secondary_elementary_mathematics_preliminary_exam_2024_1_15b.png
{'response': [{'questions': [{'question_text': 'Emma claims that \\( 3^{400} \\) is smaller than \\( 27^{200} \\). Do you agree with her statement? Justify your answer with mathematical calculations.', 'topic': 'Numbers and their operations', 'sub_topic': 'Applying the laws of indices', 'steps': ['Rewrite \\( 27^{200} \\) as \\((3^3)^{200}\\), which equals \\(3^{600}\\).', 'Compare \\(3^{400}\\) and \\(3^{600}\\): Since the bases are the same and 400 < 600, \\(3^{400} < 3^{600}\\).', "Thus, Emma's claim is correct."], 'answ

### 2.2 Chain of Thought Prompting (Zero-shot)

In [7]:
print(f"----- Chain of Thought (COT) Prompting -----")

response = query(
    user_query = MOCK_DATA["user_query"],
    subject = MOCK_DATA["subject"],
    supplementary_system_prompt = "Let's thinkg step by step."
)
results["COT_0_shot_prompting"] = response

# Print Results
print(response)


----- Chain of Thought (COT) Prompting -----
Found match on page 21. Saved to temp/fairfield_methodist_school_secondary_elementary_mathematics_preliminary_exam_2024_1_24i.png
No matching text found in PDF - saving full pages
Saved combined pages 22 to 22 to temp/bukit_panjang_government_high_school_elementary_mathematics_preliminary_exam_2023_1_26a.png
Found match on page 12. Saved to temp/fairfield_methodist_school_secondary_elementary_mathematics_preliminary_exam_2024_1_15b.png
{'response': [{'questions': [{'question_text': 'Liam claims that \\(3^{600}\\) is greater than \\(9^{300}\\). Do you agree with his statement? Justify your answer with mathematical working.', 'topic': 'Numbers and their operations', 'sub_topic': 'Applying the laws of indices', 'steps': ['Rewrite 9 as a power of 3: \\(9 = 3^2\\), so \\(9^{300} = (3^2)^{300} = 3^{600}\\).', 'Compare \\(3^{600}\\) and \\(3^{600}\\): They are equal.', "Since \\(3^{600} = 3^{600}\\), Liam's claim that \\(3^{600}\\) is greater than 

### 2.3 Few-shot Prompting

In [8]:
# TODO
# How to create the few shot prompting examples?

### Export Results

In [9]:
print(results)

{'baseline_prompting': {'response': [{'questions': [{'question_text': 'Emma claims that \\( 3^{400} \\) is smaller than \\( 27^{200} \\). Do you agree with her statement? Justify your answer with mathematical calculations.', 'topic': 'Numbers and their operations', 'sub_topic': 'Applying the laws of indices', 'steps': ['Rewrite \\( 27^{200} \\) as \\((3^3)^{200}\\), which equals \\(3^{600}\\).', 'Compare \\(3^{400}\\) and \\(3^{600}\\): Since the bases are the same and 400 < 600, \\(3^{400} < 3^{600}\\).', "Thus, Emma's claim is correct."], 'answer': 'Answer: Yes, \\( 3^{400} < 27^{200} \\).'}, {'question_text': 'Lucas argues that \\( 10^{150} \\) is less than \\( 100^{75} \\). Do you agree with his assertion? Provide a justification using mathematical reasoning.', 'topic': 'Numbers and their operations', 'sub_topic': 'Applying the laws of indices', 'steps': ['Rewrite \\(100^{75}\\) as \\((10^2)^{75}\\), which is equal to \\(10^{150}\\).', 'Compare \\(10^{150}\\) and \\(10^{150}\\): Si

In [10]:
import json

with open('experiments/exp2_output.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=4)


## 3. Analysis

Goal: Ensure that generated questions are diverse but still sourced from retrieved documents. Ensure that the questions produced are diverse + answerable + conforms to SEAB syllabus

By inspecting on the `experiments/exp2_output.json`, we can derive the following insights:

- TODO
