In [3]:
import json

# read similar_question_data.json

with open('similar_question_data.json') as f:
    data = json.load(f)

print(data[:5])

[{'question_id': '006d7', 'question_text': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.', 'subject': 'Mathematics', 'similar_questions': [{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is', 'similarity_score': 0.981, 'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ 

In [4]:
len(data)

553

In [5]:
data[0]

{'question_id': '006d7',
 'question_text': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.',
 'subject': 'Mathematics',
 'similar_questions': [{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
   'similarity_score': 0.981,
   'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A

In [6]:
data[0]["question_text"]

'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.'

In [7]:
data[0]["similar_questions"][0]

{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
 'similarity_score': 0.981,
 'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.'}

In [8]:
# basic qna logic


from google import genai
import os
from dotenv import load_dotenv
from google.genai import types
import json

from pydantic import BaseModel, ValidationError

class Solution(BaseModel):
    explanation: str
    final_answer: str

load_dotenv()
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))


def get_raw_solution(question: str, max_retries: int = 3) -> Solution | None:
    """
    A straightfoward question answering function using the gemini model.
    Args:
        question (str): The question to be answered.
        max_retries (int): The maximum number of retries to get a response from the model (for object validation).
    Returns:
        dict: A dictionary containing the explanation and final answer.
    """
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=f"""You are an academic expert at solving problems in the field of maths, physics and chemistry. 
            Respond with the solution to the given problem: {question}
            You respond with a JSON of explanation and final_answer where you can give step by step explanation in the explanation and the final solution in the final_answer.
            Keep final answer direct and as short as possible and keep the step by stem explanation to the explanation portion of the JSON
            """,
            config=types.GenerateContentConfig(
                thinking_config=types.ThinkingConfig(thinking_budget=0),
                response_mime_type="application/json",
                response_schema=Solution,
            ),
        )
        
        parsed = json.loads(response.text)
        solution = Solution(parsed)
        return solution
        
    except (json.JSONDecodeError, ValidationError) as e:
        if max_retries > 0:
            return get_raw_solution(question, max_retries - 1)
        else:
            print("Max retries reached: JSON parse error.")
            return None

    except Exception as e:
        print(f"Error: {e}")
        return None


get_raw_solution(data[0]["question_text"])


Solution(explanation="Let the given matrix be A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$. We need to find the number of integers n in the set {1, 2, ..., 100} such that $A^n = A$.\n\nFirst, let's calculate $A^2$:\n$A^2 = A \\cdot A = \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1+i)(1+i) + 1(-i) & (1+i)(1) + 1(0) \\ -i(1+i) + 0(-i) & -i(1) + 0(0) \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1 + 2i + i^2) - i & 1+i \\ (-i - i^2) & -i \\end{pmatrix}$\nSince $i^2 = -1$,\n$A^2 = \\begin{pmatrix} (1 + 2i - 1) - i & 1+i \\ (-i - (-1)) & -i \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} 2i - i & 1+i \\ -i + 1 & -i \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} i & 1+i \\ 1-i & -i \\end{pmatrix}$\n\nNow, let's calculate $A^3$:\n$A^3 = A^2 \\cdot A = \\begin{pmatrix} i & 1+i \\ 1-i & -i \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$\n$A^3 = \\begin{pmatrix} i(1+i) + (1+i)(-i) & i(1) + (1+i)(0) \\ 

In [9]:
data[0]["similar_questions"]


[{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
  'similarity_score': 0.981,
  'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.'},
 {'similar_question_text': 'Let $$A = \\left( {\\matrix{\n   {1 + i} & 1  \\cr \n   { - i} & 0  \\cr \n\n } } \\right)$$ where $$i = \\sqrt { - 1} $$. Then, the number of elements in the set { n $$\\in$$ {1, 2, ......, 100} : A n = A } is ____________.',
  'similarity_score': 0.98,
  'summarized_solution_approach': 'First, the solution calculates A^2 and A^4. It finds that A^4 equals the identity matrix I. Con

In [10]:
# evaluation

class SimilarQuestion(BaseModel):
    similar_question_text: str
    similarity_score: float
    summarized_solution_approach: str
    
class SimilarQuestionsEvaluation(BaseModel):
    conceptual_similarity_score: int
    structural_similarity_score: int
    difficulty_alignment_score: int
    solution_approach_transferability_score: int
    
    
def evaluate_similar_question(question: str, similar_question: SimilarQuestion, max_retries: int = 3) -> SimilarQuestionsEvaluation:
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=f"""You are an academic expert at analyzing and solving problems in the field of maths, physics and chemistry. 
            Here, you are given a question alongside a similar question, its solution approach and similarity score, and you are responsible for evaluating the similarity between the main question and it's similar question.
            You must assess how well the similar question represents the input question across the following dimensions.
            1. Conceptual Similarity: Do they test the same underlying concepts/principles?
            2. Structural Similarity: Are the problem structures analogous?
            3. Difficulty Alignment: Is the difficulty level appropriate?
            4. Solution Approach Transferability: Can the solution method be meaningfully applied?
            
            
            The input question is: {question}
            The similar question is: {str(similar_question.model_dump_json())}
            
            You respond with a JSON containing the following keys which are scores based on the aforementioned dimensions for analysis:
            
            conceptual_similarity_score: A score between 0 and 100
            structural_similarity_score: A score between 0 and 100
            difficulty_alignment_score: A score between 0 and 100 
            solution_approach_transferability_score: A score between 0 and 100
            
            Make sure to respond with a json strictly following the above format.
            
            """,
            config=types.GenerateContentConfig(
                thinking_config=types.ThinkingConfig(thinking_budget=0),
                response_mime_type="application/json",
                response_schema=SimilarQuestionsEvaluation,
            ),
        )
        
        parsed = json.loads(response.text)
        evaluation = SimilarQuestionsEvaluation(parsed)
        return evaluation
        
    except (json.JSONDecodeError, ValidationError) as e:
        print("retrying")
        if max_retries > 0:
            return get_raw_solution(question, max_retries - 1)
        else:
            print("Max retries reached: JSON parse error.")
            return None

    except Exception as e:
        print(f"Error: {e}")
        return None

    
        
evaluate_similar_question(data[0]["question_text"], SimilarQuestion(data[0]["similar_questions"][0]))

SimilarQuestionsEvaluation(conceptual_similarity_score=100, structural_similarity_score=100, difficulty_alignment_score=100, solution_approach_transferability_score=100)

In [11]:
evaluate_similar_question(data[1]["question_text"], SimilarQuestion(data[0]["similar_questions"][0]))

SimilarQuestionsEvaluation(conceptual_similarity_score=0, structural_similarity_score=0, difficulty_alignment_score=0, solution_approach_transferability_score=0)

In [12]:
data[0]["similar_questions"]

[{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
  'similarity_score': 0.981,
  'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.'},
 {'similar_question_text': 'Let $$A = \\left( {\\matrix{\n   {1 + i} & 1  \\cr \n   { - i} & 0  \\cr \n\n } } \\right)$$ where $$i = \\sqrt { - 1} $$. Then, the number of elements in the set { n $$\\in$$ {1, 2, ......, 100} : A n = A } is ____________.',
  'similarity_score': 0.98,
  'summarized_solution_approach': 'First, the solution calculates A^2 and A^4. It finds that A^4 equals the identity matrix I. Con

In [4]:
import json

# read similar_question_data.json

with open('similar_question_data.json') as f:
    data = json.load(f)

print(data[:5])

[{'question_id': '006d7', 'question_text': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.', 'subject': 'Mathematics', 'similar_questions': [{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is', 'similarity_score': 0.981, 'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ 

In [8]:
#basic solution building logic with llama index 

import os
from dotenv import load_dotenv
load_dotenv()
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.core.prompts import PromptTemplate
from llama_index.core.bridge.pydantic import BaseModel
from llama_index.llms.google_genai import GoogleGenAI
from google.genai import types


from typing import List

llm = GoogleGenAI(
    model="gemini-2.5-flash",
    api_key = os.getenv("GEMINI_API_KEY"),
    generation_config=types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(
            thinking_budget=0
        )  
    ),
    max_tokens=10000,
)

class Solution(BaseModel):
    explanation: str
    final_answer: str


prompt_tmpl = PromptTemplate("""You are an academic expert at solving problems in the field of maths, physics and chemistry. 
            Respond with the solution to the given problem: {question}
            You respond with a JSON of explanation and final_answer where you can give step by step explanation in the explanation and the final solution in the final_answer.
            Keep final answer direct and as short as possible and keep the step by stem explanation to the explanation portion of the JSON
            """,)

solution_object = llm.as_structured_llm(Solution).complete(prompt_tmpl.format(question=data[0]["question_text"])).raw

print(solution_object)

explanation="The given matrix is A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$. We need to find the number of n in {1, 2, ..., 100} such that $A^n = A$. Let's calculate the powers of A.\n\nFirst, calculate $A^2$:\n$A^2 = A \\cdot A = \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1+i)(1+i) + 1(-i) & (1+i)(1) + 1(0) \\ -i(1+i) + 0(-i) & -i(1) + 0(0) \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1+2i+i^2) - i & 1+i \\ -i-i^2 & -i \\end{pmatrix}$\nSince $i^2 = -1$,\n$A^2 = \\begin{pmatrix} (1+2i-1) - i & 1+i \\ -i-(-1) & -i \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} i & 1+i \\ 1-i & -i \\end{pmatrix}$\n\nNow, calculate $A^3$:\n$A^3 = A^2 \\cdot A = \\begin{pmatrix} i & 1+i \\ 1-i & -i \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$\n$A^3 = \\begin{pmatrix} i(1+i) + (1+i)(-i) & i(1) + (1+i)(0) \\ (1-i)(1+i) + (-i)(-i) & (1-i)(1) + (-i)(0) \\end{pmatrix}$\n$A^3 = \\begin{pmatrix} i+i^2 -i-i

In [9]:
solution_object.model_dump()["final_answer"]

'25'

In [10]:
data[0]

{'question_id': '006d7',
 'question_text': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.',
 'subject': 'Mathematics',
 'similar_questions': [{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
   'similarity_score': 0.981,
   'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A

In [11]:
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
from difflib import SequenceMatcher


class SimilarQuestion(BaseModel):
    similar_question_text: str
    similarity_score: float
    summarized_solution_approach: str

class SimilarQuestionsEvaluation(BaseModel):
    similar_question: str
    solution_approach: str
    conceptual_similarity_score: int
    structural_similarity_score: int
    difficulty_alignment_score: int
    solution_approach_transferability_score: int
    total_score: int
    
class SimilarQuestionsEvaluationList(BaseModel):
    original_question: str
    similar_question_evaluations: List[SimilarQuestionsEvaluation]
    
    
async def get_similar_questions(question: str) -> List[SimilarQuestion]:
    # ideally we do a vector similarity search here but rn just doing fuzzy matching with whatever question the llm passes to this function
    """ Get most similar question from dataset and return its similar questions """
    with open('similar_question_data.json') as f:
        data = json.load(f)

    best_match = max(
        data,
        key=lambda q: SequenceMatcher(None, q['question_text'], question).ratio()
    )

    return [SimilarQuestion(sq) for sq in best_match.get('similar_questions', [])]

    
    
get_similar_questions_tool = FunctionTool.from_defaults(fn=get_similar_questions)

eval_prompt = """You are an academic expert at analyzing and solving problems in the field of maths, physics and chemistry. 
Here, you are given a question and you must get similar questions to the question from the dataset and evaluate them and return the results.
The image descriptions after the question given are also part of the question, make sure to include them.
            
You must assess how well each similar question represents the input question across the following dimensions.
            
1. Conceptual Similarity: Do they test the same underlying concepts/principles?
2. Structural Similarity: Are the problem structures analogous?
3. Difficulty Alignment: Is the difficulty level appropriate?
4. Solution Approach Transferability: Can the solution method be meaningfully applied?
            
You should get similar questions via the tool get_similar_questions_tool
which accepts the question and responds with similar questions, their solution approaches and vector similarity scores.
You must pass the question that you receive identically to the function without changing a single character.           
            
You respond with a json with key original_question and another key called similar_question_evaluations with its corresponding value which is a list of objects.
Each object contain the following keys which are scores based on the aforementioned dimensions for analysis for each similar quesion:
            
            
similar_question: The similar question
solution_approach: The solution approach
conceptual_similarity_score: A score between 0 and 100
structural_similarity_score: A score between 0 and 100
difficulty_alignment_score: A score between 0 and 100 
solution_approach_transferability_score: A score between 0 and 100
total_score: A score between 0 and 100 which is the average of all the scores
            
Make sure to respond with a list of objects strictly following the above format.
 """



eval_agent = FunctionAgent(
    name="EvaluationAgent",
    description="It responds with similar questions and answers to a question alongside their solution approach and their evaluations",
    system_prompt=(
        eval_prompt
    ),
    llm=llm,
    tools=[get_similar_questions_tool],
    output_cls=SimilarQuestionsEvaluationList,
    can_handoff_to=["SolutionAgent"],
)

response = await eval_agent.run(data[0]["question_text"])

In [12]:
data[0]["question_text"]

'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.'

In [13]:
response.structured_response

{'original_question': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\ \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______',
 'similar_question_evaluations': [{'similar_question': 'Let A = $\\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\\\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
   'solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.',
   'conceptual_similarity_score': 100,
   'structural_similarity_score': 100,
   'difficulty_alignment_score': 100,
   'solution_approach_transferability_score': 100,
   'total_score': 100},
  {'similar_question': 'Let $$A = \\\\left( 

In [14]:
response.get_pydantic_model(SimilarQuestionsEvaluationList)

SimilarQuestionsEvaluationList(original_question='माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\ \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______', similar_question_evaluations=[SimilarQuestionsEvaluation(similar_question='Let A = $\\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\\\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is', solution_approach='The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.', conceptual_similarity_score=100, structural_similarity_score=100, difficulty_alignment_score=100, solution_approach_transferability_score=100, total_score=100), SimilarQuestionsEvaluation(similar

In [17]:

class Solution(BaseModel):
    explanation: str
    final_answer: str


# to be reworked to make it thorough
solution_prompt = """You are an academic expert at solving problems in the field of maths, physics and chemistry. 
You are given an original question and sometimes alongside it few similar questions, solution approaches and thorough evaluations of their relevance for each similar question.
You respond with a JSON of explanation and final_answer where you can give step by step explanation in the explanation and the final solution in the final_answer.
The explanation should be a step by step solution approach to the given question so that the student can understand how you arrived to the solution.
Keep final answer direct and as short as possible and keep the step by stem explanation to the explanation portion of the JSON
"""


solution_agent = FunctionAgent(
    name="SolutionAgent",
    description="It responds with a final answer to a given original questions alongside similar questions, solution approaches and evals",
    system_prompt=(
        solution_prompt
    ),
    llm=llm,
    output_cls=Solution,
    tools=[],
    timeout=30,
)




response = await solution_agent.run(data[0]["question_text"])
response.structured_response

{'explanation': "Let's first calculate the powers of A to observe a pattern.\n\nGiven matrix A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$.\n\nStep 1: Calculate $A^2$\n$A^2 = A \\cdot A = \\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1+i)(1+i) + 1(-i) & (1+i)(1) + 1(0) \\\\ (-i)(1+i) + 0(-i) & (-i)(1) + 0(0) \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} (1 + 2i + i^2) - i & 1+i \\\\ -i - i^2 & -i \\end{pmatrix}$\nSince $i^2 = -1$,\n$A^2 = \\begin{pmatrix} (1 + 2i - 1) - i & 1+i \\\\ -i - (-1) & -i \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} 2i - i & 1+i \\\\ -i + 1 & -i \\end{pmatrix}$\n$A^2 = \\begin{pmatrix} i & 1+i \\\\ 1-i & -i \\end{pmatrix}$\n\nStep 2: Calculate $A^3$\n$A^3 = A^2 \\cdot A = \\begin{pmatrix} i & 1+i \\\\ 1-i & -i \\end{pmatrix} \\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\end{pmatrix}$\n$A^3 = \\begin{pmatrix} i(1+i) + (1+i)(-i) & i(1) + (1+i)(0) \\\\ (1-i)(1+i) + (-i)(-i) & (1-i)(1) 

In [28]:

from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.workflow import Context


class Solution(BaseModel):
    explanation: str
    final_answer: str
    referred_faqs: list[str]



solution_prompt = """You are an academic expert at solving problems in the field of maths, physics and chemistry. 
You are given an original question and sometimes alongside it few similar questions, solution approaches and thorough evaluations of their relevance for each similar question.
Make sure to refer to the evaluation scores of each similar question for considering their relevance.
You respond with a JSON of explanation and final_answer where you can give step by step explanation in the explanation and the final solution in the final_answer.
The explanation should be a step by step solution approach to the given question so that the student can understand how you arrived to the solution.
Keep final answer direct and as short as possible and keep the step by stem explanation to the explanation portion of the JSON
Mention the FAQs that you referred to in the referred_faqs portion of the JSON
"""

solution_agent = FunctionAgent(
    name="SolutionAgent",
    description="It accepts original question, similar questions & their evaluations to form a final answer to the original question",
    system_prompt=(
        solution_prompt
    ),
    llm=llm,
    output_cls=Solution,
    timeout=30,
    tools=[]

)

eval_prompt = """You are an academic expert at analyzing and solving problems in the field of maths, physics and chemistry. 
Here, you are given a question and you must get similar questions to the question from the dataset and evaluate them and return the results.
The image descriptions after the question given are also part of the question, make sure to include them.
            
You must assess how well each similar question represents the input question across the following dimensions.
            
1. Conceptual Similarity: Do they test the same underlying concepts/principles?
2. Structural Similarity: Are the problem structures analogous?
3. Difficulty Alignment: Is the difficulty level appropriate?
4. Solution Approach Transferability: Can the solution method be meaningfully applied?
            
You should get similar questions via the tool get_similar_questions_tool
which accepts the question and responds with similar questions, their solution approaches and vector similarity scores.
You must pass the question that you receive identically to the function without changing a single character.           
            
You respond with a json with key original_question and another key called similar_question_evaluations with its corresponding value which is a list of objects.
Each object contain the following keys which are scores based on the aforementioned dimensions for analysis for each similar quesion:
            
            
similar_question: The similar question
solution_approach: The solution approach
conceptual_similarity_score: A score between 0 and 100
structural_similarity_score: A score between 0 and 100
difficulty_alignment_score: A score between 0 and 100 
solution_approach_transferability_score: A score between 0 and 100
total_score: A score between 0 and 100 which is the average of all the scores
            
Make sure to respond with a list of objects strictly following the above format. Handover the output JSON to the SolutionAgent to form a final answer to the original question.
 """
 
eval_agent = FunctionAgent(
    name="EvaluationAgent",
    description="It responds with similar questions and answers to a question alongside their solution approach and their evaluations",
    system_prompt=(
        eval_prompt
    ),
    llm=llm,
    tools=[get_similar_questions_tool,],
    output_cls=SimilarQuestionsEvaluationList,
    can_handoff_to=["SolutionAgent"],
)

agent_workflow = AgentWorkflow(
    agents=[eval_agent, solution_agent],
    root_agent=eval_agent.name,
    initial_state={
        "original_question": "",
        "unsubstantiated_explanation" : "",
        "unsubstantiated_answer": "",
        "substantiated_explanation": "",
        "substantiated_answer": "",
        
    },
)

In [None]:
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
)

user_question = data[0]["question_text"]


handler = agent_workflow.run(
    user_msg=(
        f"Answer the following question asked by a student: {(user_question)}\n"
    )
)

current_agent = None
current_tool_calls = ""

final_output = ""

async for event in handler.stream_events():
    if (
        hasattr(event, "current_agent_name")
        and event.current_agent_name != current_agent
    ):
        current_agent = event.current_agent_name
        print(f"\n{'='*50}")
        print(f"🤖 Agent: {current_agent}")
        print(f"{'='*50}\n")

    # if isinstance(event, AgentStream):
    #     if event.delta:
    #         print(event.delta, end="", flush=True)
    # elif isinstance(event, AgentInput):
    #     print("📥 Input:", event.input)
    elif isinstance(event, AgentOutput):
        if event.response.content:
            print("📤 Output:", event.response.content)
            final_output += event.response.content
        if event.tool_calls:
            print(
                "🛠️  Planning to use tools:",
                [call.tool_name for call in event.tool_calls],
            )
    elif isinstance(event, ToolCallResult):
        print(f"🔧 Tool Result ({event.tool_name}):")
        print(f"  Arguments: {event.tool_kwargs}")
        print(f"  Output: {event.tool_output}")
    elif isinstance(event, ToolCall):
        print(f"🔨 Calling Tool: {event.tool_name}")
        print(f"  With arguments: {event.tool_kwargs}")

In [30]:
# MAS for Similar Questions Evaluation

from llama_index.core.agent.workflow import FunctionAgent, ReActAgent


sub_agent_llm = GoogleGenAI(
    model="gemini-2.5-flash",
    api_key = os.getenv("GEMINI_API_KEY"),
    generation_config=types.GenerateContentConfig(
        max_output_tokens=8192,
        thinking_config=types.ThinkingConfig(
            thinking_budget=0
        )  
    ),
)


class ConceptualSimilarity(BaseModel):
    conceptual_similarity: int
    conceptual_similarity_note: str

conceptual_similarity_agent = FunctionAgent(
    system_prompt="""You are an academic expert in math, physics and chemistry.
You are tasked with evaluating the conceptual similarity of an original question and a fetched similar question alongside its solution approach.
You evaluate the similar question for if it tests the same underlying concepts/principles.
You return an object with keys conceptual_similarity and conceptual_similarity_note.
conceptual_similarity should be a score between 0 to 100.
conceptual_similarity_note should be a short note explaining the score.
""",
    llm=sub_agent_llm,
    tools=[],
    output_cls=ConceptualSimilarity,
)

class StructuralSimilarity(BaseModel):
    structural_similarity: int
    structural_similarity_note: str
    
structural_similarity_agent = FunctionAgent(
    system_prompt="""You are an academic expert in math, physics and chemistry.
You are tasked with evaluating the structural similarity of an original question and a fetched similar question.
You evaluate if the problem structures are analogous, considering the type of information given, what is being asked, and the overall setup of the problem.
You return an object with keys structural_similarity and structural_similarity_note.
structural_similarity should be a score between 0 to 100.
structural_similarity_note should be a short note explaining the score.
""",
    llm=sub_agent_llm,
    tools=[],
    output_cls=StructuralSimilarity,
)

class DifficultyAlignment(BaseModel):
    difficulty_alignment: int
    difficulty_alignment_note: str

difficulty_alignment_agent = FunctionAgent(
    system_prompt="""You are an academic expert in math, physics and chemistry.
You are tasked with evaluating the difficulty alignment of an original question and a fetched similar question.
You evaluate if the difficulty level is appropriate, considering factors like the number of steps required, the complexity of calculations, and the depth of conceptual understanding needed.
You return an object with keys difficulty_alignment and difficulty_alignment_note.
difficulty_alignment should be a score between 0 to 100.
difficulty_alignment_note should be a short note explaining the score.
""",
    llm=sub_agent_llm,
    tools=[],
    output_cls=DifficultyAlignment,
)

class ApproachTransferability(BaseModel):
    approach_transferability: int
    approach_transferability_note: str

approach_transferability_agent = FunctionAgent(
    system_prompt="""You are an academic expert in math, physics and chemistry.
You are tasked with evaluating the solution approach transferability from a fetched similar question's solution to an original question.
You evaluate if the solution method, steps, and reasoning for the similar question can be meaningfully and directly applied to solve the original question.
You return an object with keys approach_transferability and approach_transferability_note.
approach_transferability should be a score between 0 to 100.
approach_transferability_note should be a short note explaining the score.
""",
    llm=sub_agent_llm,
    tools=[],
    output_cls=ApproachTransferability,
)




In [42]:
from llama_index.core.tools import FunctionTool

async def evaluate_conceptual_similarity(original_question: str, similar_question: str, solution_approach: str) -> str:
    """
    Useful for evaluating if two questions test the same underlying concepts and principles.
    Returns a score and a note on conceptual similarity.
    """
    user_msg = f"""
    Original Question:
    {original_question}

    Fetched Similar Question:
    {similar_question}

    Solution Approach for Similar Question:
    {solution_approach}

    Please evaluate the conceptual similarity based on these inputs.
    """
    result = await conceptual_similarity_agent.run(user_msg=user_msg)
    return str(result)

async def evaluate_structural_similarity(original_question: str, similar_question: str, solution_approach: str) -> str:
    """
    Useful for evaluating if the problem structures of two questions are analogous.
    Considers the type of information given and the problem setup.
    Returns a score and a note on structural similarity.
    """
    user_msg = f"""
    Original Question:
    {original_question}

    Fetched Similar Question:
    {similar_question}

    Solution Approach for Similar Question:
    {solution_approach}

    Please evaluate the structural similarity based on these inputs.
    """
    result = await structural_similarity_agent.run(user_msg=user_msg)
    return str(result)

async def evaluate_difficulty_alignment(original_question: str, similar_question: str, solution_approach: str) -> str:
    """
    Useful for evaluating if two questions have an appropriate and similar difficulty level.
    Considers complexity, number of steps, and required knowledge.
    Returns a score and a note on difficulty alignment.
    """
    user_msg = f"""
    Original Question:
    {original_question}

    Fetched Similar Question:
    {similar_question}

    Solution Approach for Similar Question:
    {solution_approach}

    Please evaluate the difficulty alignment based on these inputs.
    """
    result = await difficulty_alignment_agent.run(user_msg=user_msg)
    return str(result)

async def evaluate_approach_transferability(original_question: str, similar_question: str, solution_approach: str) -> str:
    """
    Useful for evaluating if the solution method for one question can be meaningfully applied to solve another.
    Returns a score and a note on the transferability of the solution approach.
    """
    user_msg = f"""
    Original Question:
    {original_question}

    Fetched Similar Question:
    {similar_question}

    Solution Approach for Similar Question:
    {solution_approach}

    Please evaluate the solution approach transferability based on these inputs.
    """
    result = await approach_transferability_agent.run(user_msg=user_msg)
    return str(result)

In [43]:
orchestrator_llm = GoogleGenAI(
    model="gemini-2.5-flash",
    api_key = os.getenv("GEMINI_API_KEY"),
    generation_config=types.GenerateContentConfig(
        max_output_tokens=8192,
        thinking_config=types.ThinkingConfig(
            thinking_budget=0
        )  
    ),
)



class SimilarQuestionsEvaluation(BaseModel):
    similar_question: str
    solution_approach: str
    conceptual_similarity_score: int
    structural_similarity_score: int
    difficulty_alignment_score: int
    solution_approach_transferability_score: int
    total_score: int
    notes: str
    
    
orchestrator = FunctionAgent(
    system_prompt="""You are an academic evaluation expert. You will be given a question, its similar question and solution approach.
    You evaluate the similar quesiton for multiple criteria via the tools provided to you.
    You form a comprehensive evaluation from the results you receive from the tools to create a JSON object with the following keys:
    similar_question: The similar question
    solution_approach: The solution approach
    conceptual_similarity_score: The conceptual similarity score
    structural_similarity_score: The structural similarity score
    difficulty_alignment_score: The difficulty alignment score
    solution_approach_transferability_score: The solution approach transferability score
    total_score: The total score
    notes: A short note explaning the complete evaluation of the relevance of the similar question 
    
    """,
    llm=orchestrator_llm,
    tools=[
        evaluate_conceptual_similarity,
        evaluate_structural_similarity,
        evaluate_difficulty_alignment,
        evaluate_approach_transferability
    ],
    output_cls=SimilarQuestionsEvaluation,
)

In [50]:
from llama_index.core.agent.workflow import (
    AgentInput,
    AgentOutput,
    ToolCall,
    ToolCallResult,
    AgentStream,
)
from llama_index.core.workflow import Context


async def run_orchestrator( user_msg: str):
    handler = orchestrator.run(
        user_msg=user_msg,
    )

    async for event in handler.stream_events():
        if isinstance(event, AgentStream):
            if event.delta:
                print(event.delta, end="", flush=True)
        # elif isinstance(event, AgentInput):
        #     print("📥 Input:", event.input)
        elif isinstance(event, AgentOutput):
            # Skip printing the output since we are streaming above
            # if event.response.content:
            #     print("📤 Output:", event.response.content)
            if event.tool_calls:
                print(
                    "🛠️  Planning to use tools:",
                    [call.tool_name for call in event.tool_calls],
                )
            else:
                return event.response.content
        elif isinstance(event, ToolCallResult):
            print(f"🔧 Tool Result ({event.tool_name}):")
            print(f"  Arguments: {event.tool_kwargs}")
            print(f"  Output: {event.tool_output}")
        elif isinstance(event, ToolCall):
            print(f"🔨 Calling Tool: {event.tool_name}")
            print(f"  With arguments: {event.tool_kwargs}")


In [51]:
data[0]

{'question_id': '006d7',
 'question_text': 'माना A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$, जहाँ i = √-1 है। तो समुच्चय { n ∈ {1,2,......,100} : $A^n$ = A} में अवयवों की संख्या है _______\n\nThe image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.',
 'subject': 'Mathematics',
 'similar_questions': [{'similar_question_text': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
   'similarity_score': 0.981,
   'summarized_solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A

In [45]:
await run_orchestrator(
    user_msg = f"""
    Original Question:
    {data[15]["question_text"]}

    Fetched Similar Question:
    {data[15]["similar_questions"][0]["similar_question_text"]}

    Solution Approach for Similar Question:
    {data[15]["similar_questions"][0]["summarized_solution_approach"]}

    Please evaluate the solution approach transferability based on these inputs and retur the results
    """
)


🛠️  Planning to use tools: ['evaluate_conceptual_similarity', 'evaluate_structural_similarity', 'evaluate_difficulty_alignment', 'evaluate_approach_transferability']
🔨 Calling Tool: evaluate_conceptual_similarity
  With arguments: {'similar_question': '\\sum_{k = 0}^{6} {^{51-k}C_3} is equal to', 'solution_approach': 'The summation series is expanded by substituting the values of k. Then, $^{45}C_4$ is added and subtracted. Using the identity $^{n}C_r + ^{n}C_{r-1} = ^{n+1}C_r$ repeatedly, the series is simplified to arrive at the final answer.', 'original_question': 'The image contains a mathematical expression involving a summation, binomial coefficients, and powers. Specifically, it shows a summation from k=0 to n of the expression (-1)^k * (n choose k) * (n-k)^m, along with the conditions m=|A| and n=|B|.'}
🔨 Calling Tool: evaluate_structural_similarity
  With arguments: {'solution_approach': 'The summation series is expanded by substituting the values of k. Then, $^{45}C_4$ is add



🔧 Tool Result (evaluate_conceptual_similarity):
  Arguments: {'similar_question': '\\sum_{k = 0}^{6} {^{51-k}C_3} is equal to', 'solution_approach': 'The summation series is expanded by substituting the values of k. Then, $^{45}C_4$ is added and subtracted. Using the identity $^{n}C_r + ^{n}C_{r-1} = ^{n+1}C_r$ repeatedly, the series is simplified to arrive at the final answer.', 'original_question': 'The image contains a mathematical expression involving a summation, binomial coefficients, and powers. Specifically, it shows a summation from k=0 to n of the expression (-1)^k * (n choose k) * (n-k)^m, along with the conditions m=|A| and n=|B|.'}
  Output: ```json
{
  "conceptual_similarity": 30,
  "conceptual_similarity_note": "The original question involves a more complex combinatorial identity often related to inclusion-exclusion or differences of functions, specifically involving powers and alternating signs. The fetched question is a direct application of the Hockey-stick identity (

In [46]:
await run_orchestrator(
    user_msg = f"""
    Original Question:
    {data[0]["question_text"]}

    Fetched Similar Question:
    {data[0]["similar_questions"][0]["similar_question_text"]}

    Solution Approach for Similar Question:
    {data[0]["similar_questions"][0]["summarized_solution_approach"]}

    Please evaluate the solution approach transferability based on these inputs and retur the results
    """
)


🛠️  Planning to use tools: ['evaluate_conceptual_similarity', 'evaluate_structural_similarity', 'evaluate_difficulty_alignment', 'evaluate_approach_transferability']
🔨 Calling Tool: evaluate_conceptual_similarity
  With arguments: {'similar_question': 'Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is', 'original_question': 'The image shows a mathematical expression defining a matrix A and a set condition. The matrix A is a 2x2 matrix with elements 1+i, 1, -i, and 0, where i is defined as the square root of -1. The problem asks to find the number of elements in the set of n belonging to {1, 2, ..., 100} such that A to the power of n equals A.', 'solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9

In [56]:
orchestrator_response = await orchestrator.run(
        user_msg= f"""
    Original Question:
    {data[0]["question_text"]}

    Fetched Similar Question:
    {data[0]["similar_questions"][0]["similar_question_text"]}

    Solution Approach for Similar Question:
    {data[0]["similar_questions"][0]["summarized_solution_approach"]}

    Please evaluate the solution approach transferability based on these inputs and retur the results
    """,
    )



In [58]:
orchestrator_response.structured_response

{'similar_question': 'Let A = $\\begin{pmatrix} 1+i & 1 \\\\ -i & 0 \\\\ \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is',
 'solution_approach': 'The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.',
 'conceptual_similarity_score': 100,
 'structural_similarity_score': 100,
 'difficulty_alignment_score': 95,
 'solution_approach_transferability_score': 100,
 'total_score': 98,
 'notes': 'The similar question is nearly identical to the original question in all aspects: conceptual understanding, structure, and difficulty. Consequently, the solution approach provided is perfectly transferable and directly applicable to solving the original question. T

In [4]:
import requests
import json

with open('similar_question_data.json') as f:
    data = json.load(f)


payload = {
    "question_text": data[0]["question_text"],
    "similar_question": data[0]["similar_questions"][0]["similar_question_text"],
    "summarized_solution_approach": data[0]["similar_questions"][0]["summarized_solution_approach"]
}

response = requests.post(
    "http://localhost:8000/evaluate",
    json=payload
)

print("Status Code:", response.status_code)
print("Response:")
print(json.dumps(response.json(), indent=2))

Status Code: 200
Response:
{
  "success": true,
  "data": {
    "similar_question": "Let A = $\\begin{pmatrix} 1+i & 1 \\ -i & 0 \\end{pmatrix}$ where I = $\\sqrt{-1}$. Then, the number of elements in the set {n$\\in$ {1,2, ....., 100} : $A^n$ = A} is",
    "solution_approach": "The solution calculates $A^2$ and $A^4$. It finds that $A^4 = I$ (identity matrix). Consequently, $A^5 = A, A^9 = A$, and so on. The values of n for which $A^n = A$ form an arithmetic progression: n = 1, 5, 9, ..., 97. The number of terms in this sequence is then calculated, which gives the number of elements in the set.",
    "conceptual_similarity_score": 100,
    "structural_similarity_score": 100,
    "difficulty_alignment_score": 100,
    "solution_approach_transferability_score": 100,
    "total_score": 400,
    "notes": "The similar question is an exact match to the original question in all aspects: conceptual understanding, structural presentation, and difficulty level. Consequently, the provided soluti

In [10]:
import requests
import json

with open('similar_question_data.json') as f:
    data = json.load(f)


payload = {
    "question_text": data[15]["question_text"],
    "similar_question": data[15]["similar_questions"][0]["similar_question_text"],
    "summarized_solution_approach": data[15]["similar_questions"][0]["summarized_solution_approach"]
}

response = requests.post(
    "http://localhost:8000/evaluate",
    json=payload
)

print("Status Code:", response.status_code)
print("Response:")
print(json.dumps(response.json(), indent=2))

Status Code: 200
Response:
{
  "success": true,
  "data": {
    "similar_question": "\\sum_{k = 0}^{6} {^{51-k}C_3} is equal to",
    "solution_approach": "The summation series is expanded by substituting the values of k. Then, $^{45}C_4$ is added and subtracted. Using the identity $^{n}C_r + ^{n}C_{r-1} = ^{n+1}C_r$ repeatedly, the series is simplified to arrive at the final answer.",
    "conceptual_similarity_score": 20,
    "structural_similarity_score": 10,
    "difficulty_alignment_score": 20,
    "solution_approach_transferability_score": 20,
    "total_score": 17,
    "notes": "The similar question shares very little in common with the original question. While both involve summations and binomial coefficients, their conceptual underpinnings and structural complexities are vastly different. The original question presents a general, complex combinatorial identity (likely related to inclusion-exclusion or Stirling numbers), requiring a deep theoretical understanding. In contrast, 