In [None]:
# %pip install openai python-dotenv pandas

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import json
import pandas as pd
import os

In [None]:
# OPENAI_API_KEY = "add your key here"
# print(f"OpenAI API Key: {OPENAI_API_KEY}")

load_dotenv()

In [4]:
client = OpenAI()

In [9]:
def generate_mcqs(context, num_questions=5):
    """
    Generate MCQs from a given context using an LLM.
    """
    prompt = f"""
    Generate {num_questions} multiple-choice questions (MCQs) based on the following context. 
    Each question should have 4 options, with one correct answer and three distractors.
    
    Context: {context}
    
    Format each question as follows:
    Q1. [Question text]
    A) [Option 1]
    B) [Option 2]
    C) [Option 3]
    D) [Option 4]
    Correct Answer: [Correct Option]
    
    Please ensure the questions are relevant, clear, and cover key aspects of the context.
    """
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that generates multiple-choice questions."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=1000
    )
    
    mcqs = response.choices[0].message.content
    return mcqs

In [10]:
context = """
Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods with the help of chlorophyll. 
The process involves the intake of carbon dioxide and water, which are then converted into glucose and oxygen. 
Photosynthesis occurs in the chloroplasts of plant cells, where chlorophyll captures light energy. 
This process is crucial for the production of oxygen and the removal of carbon dioxide from the atmosphere.
"""

In [11]:
mcqs = generate_mcqs(context)

In [13]:
mcqs = mcqs.split("\n\n")
mcqs

['Q1. What is the primary pigment used in photosynthesis?\nA) Hemoglobin  \nB) Carotenoid  \nC) Chlorophyll  \nD) Myoglobin  \nCorrect Answer: C) Chlorophyll  ',
 'Q2. In which part of the plant cell does photosynthesis occur?\nA) Mitochondria  \nB) Nucleus  \nC) Chloroplasts  \nD) Ribosomes  \nCorrect Answer: C) Chloroplasts  ',
 'Q3. What are the two main raw materials needed for photosynthesis?\nA) Glucose and oxygen  \nB) Nitrogen and water  \nC) Carbon dioxide and water  \nD) Chlorophyll and sunlight  \nCorrect Answer: C) Carbon dioxide and water  ',
 'Q4. What is one of the products of photosynthesis?\nA) Nitrogen  \nB) Water  \nC) Glucose  \nD) Ammonia  \nCorrect Answer: C) Glucose  ',
 'Q5. Why is photosynthesis important for the environment?\nA) It decreases the level of oxygen in the atmosphere.  \nB) It removes carbon dioxide and produces oxygen.  \nC) It consumes all available sunlight.  \nD) It leads to the depletion of chlorophyll.  \nCorrect Answer: B) It removes carbon 

In [14]:
print(mcqs[0])

Q1. What is the primary pigment used in photosynthesis?
A) Hemoglobin  
B) Carotenoid  
C) Chlorophyll  
D) Myoglobin  
Correct Answer: C) Chlorophyll  


In [29]:
def evaluate_mcqs(mcqs, context):
    results = []
    for question in mcqs:
        prompt = f"""
        Evaluate the following MCQ based on the provided context.
        Return your evaluation in valid JSON format with scores (0-1) and justifications.

        Context: {context}

        MCQ: {question}

        Required JSON format:
        {{
            "question": "<question_text>",
            "evaluations": {{
                "relevance_score": <0-1>,
                "relevance_justification": "<text>",
                "clarity_score": <0-1>,
                "clarity_justification": "<text>",
                "difficulty_score": <0-1>,
                "difficulty_justification": "<text>",
                "correctness_score": <0-1>,
                "correctness_justification": "<text>"
            }}
        }}
        """

        try:
            response = client.chat.completions.create(
                model="gpt-4o-mini",
                # OPENAI_API_KEY=OPENAI_API_KEY,
                messages=[
                    {"role": "system", "content": "You are an MCQ evaluation assistant. Always respond with valid JSON in the exact format specified."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=1000
            )

            eval_data = json.loads(response.choices[0].message.content)
            results.append({
                'Question': eval_data['question'],
                'Relevance Score': eval_data['evaluations']['relevance_score'],
                'Relevance Justification': eval_data['evaluations']['relevance_justification'],
                'Clarity Score': eval_data['evaluations']['clarity_score'],
                'Clarity Justification': eval_data['evaluations']['clarity_justification'],
                'Difficulty Score': eval_data['evaluations']['difficulty_score'],
                'Difficulty Justification': eval_data['evaluations']['difficulty_justification'],
                'Correctness Score': eval_data['evaluations']['correctness_score'],
                'Correctness Justification': eval_data['evaluations']['correctness_justification']
            })
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON for question: {question}\nError: {str(e)}")
            continue

    return pd.DataFrame(results)

In [30]:
evaluate_mcqs(mcqs, context)

Unnamed: 0,Question,Relevance Score,Relevance Justification,Clarity Score,Clarity Justification,Difficulty Score,Difficulty Justification,Correctness Score,Correctness Justification
0,What is the primary pigment used in photosynth...,1,The question is highly relevant to the context...,1,The question is clear and concise. The meaning...,0.2,The difficulty level of the question is quite ...,1,"The correct answer, Chlorophyll, is listed as ..."
1,In which part of the plant cell does photosynt...,1,The question is highly relevant to the provide...,1,The question is clear and straightforward. It ...,0.2,"Given the context, the question is not difficu...",1,The answer (Chloroplasts) is correctly identif...
2,What are the two main raw materials needed for...,1,The question is highly relevant as it directly...,1,The question is clearly stated and specificall...,0.2,The difficulty is relatively low because the c...,1,The correct answer is C) Carbon dioxide and wa...
3,What is one of the products of photosynthesis?,1,The question is directly related to the contex...,1,The question is very clear and straightforward...,0.2,"Given the context, the question is not very di...",1,The correct answer is given as 'C) Glucose' wh...
4,Why is photosynthesis important for the enviro...,1,The question is highly relevant to the context...,1,The question is clearly worded and straightfor...,0.2,The difficulty of the question is minimal beca...,1,"The correct answer, choice B, is accurate acco..."
