In [None]:
# %pip install openai python-dotenv pandas

In [1]:
from openai import OpenAI
from dotenv import load_dotenv
import json
import pandas as pd
from os import getenv

In [2]:
load_dotenv()

True

In [3]:
# client = OpenAI(
#   base_url="https://openrouter.ai/api/v1",
#   api_key=getenv("OPENROUTER_API_KEY"),
# )

client = OpenAI()

In [4]:
def generate_mcqs(context, assessment_los, num_questions=30):
    """
    Generate MCQs from a given context using an LLM.
    """
    prompt = f"""
    You are a master computer science educator that is proficient in teaching students and 
    creatting multiple-choice questions (MCQs) assessment that assesses their 
    understanding of given topics and achieves the learning outcomes of the assessment.
    Generate {num_questions} MCQs based on the following context which includes a list of topics covered in a Python programming course, 
    and the learning outcomes of the assessment.
    Each question should have 4 options, with one correct answer and three distractors.
    Please ensure the questions are relevant, clear, and cover key aspects of the context.
    The questions should be in a variety of difficulty levels (i.e. easy, medium, hard).
    
    Context: {context}
    Assessment learning outcomes: {assessment_los}
    
    Format each question strictly as follows:
    Q1. [Question text]
    A) [Option 1]
    B) [Option 2]
    C) [Option 3]
    D) [Option 4]
    Correct Answer: [Correct Option]
    Difficulty: [Difficulty Level]
    
    
    Your answer striclty must be Q1, Q2, Q3, and so on. Nothing extra, don't write any markdown in your response.
   
    """
    
    response = client.chat.completions.create(
        model="gpt-4o-2024-11-20",
        messages=[
            # {"role": "system", "content": "You are a helpful assistant that generates multiple-choice questions."},
            {"role": "user", "content": prompt}
        ]
    )
    
    mcqs = response.choices[0].message.content
    return mcqs

In [5]:
# Read the context from a text file 
context = ""
with open("content.txt", "r") as f:
    context = f.read()
    
# print(context)

In [6]:
# Read the learning outcomes from a text file 
assessment_los = ""
with open("A1_los.txt", "r") as f:
    assessment_los = f.read()
    
# print(assessment_los)

In [7]:
mcqs = generate_mcqs(context, assessment_los)

In [8]:
mcqs

'Q1. What is the first step in the six-step problem-solving process?  \nA) Analyse the problem  \nB) Identify the problem  \nC) Implement the solution  \nD) Generate potential solutions  \nCorrect Answer: B  \nDifficulty: Easy  \n\nQ2. Which of the following best defines wicked problems?  \nA) Problems that have simple solutions.  \nB) Problems that occur regularly and are easy to solve.  \nC) Problems difficult to define, with no clear solutions.  \nD) Problems that always have one correct answer.  \nCorrect Answer: C  \nDifficulty: Medium  \n\nQ3. Which programming construct is used to repeatedly execute a block of code while a condition is true?  \nA) for loop  \nB) while loop  \nC) if statement  \nD) elif statement  \nCorrect Answer: B  \nDifficulty: Easy  \n\nQ4. What does the Python function `type()` do?  \nA) Converts a variable into a different type.  \nB) Checks the size of the variable.  \nC) Returns the data type of a variable.  \nD) Assigns a type to the variable.  \nCorrec

In [9]:
mcqs = mcqs.split("\n\n")
# mcqs.pop(0)
mcqs

['Q1. What is the first step in the six-step problem-solving process?  \nA) Analyse the problem  \nB) Identify the problem  \nC) Implement the solution  \nD) Generate potential solutions  \nCorrect Answer: B  \nDifficulty: Easy  ',
 'Q2. Which of the following best defines wicked problems?  \nA) Problems that have simple solutions.  \nB) Problems that occur regularly and are easy to solve.  \nC) Problems difficult to define, with no clear solutions.  \nD) Problems that always have one correct answer.  \nCorrect Answer: C  \nDifficulty: Medium  ',
 'Q3. Which programming construct is used to repeatedly execute a block of code while a condition is true?  \nA) for loop  \nB) while loop  \nC) if statement  \nD) elif statement  \nCorrect Answer: B  \nDifficulty: Easy  ',
 'Q4. What does the Python function `type()` do?  \nA) Converts a variable into a different type.  \nB) Checks the size of the variable.  \nC) Returns the data type of a variable.  \nD) Assigns a type to the variable.  \nCo

In [10]:
print(mcqs[3])

Q4. What does the Python function `type()` do?  
A) Converts a variable into a different type.  
B) Checks the size of the variable.  
C) Returns the data type of a variable.  
D) Assigns a type to the variable.  
Correct Answer: C  
Difficulty: Easy  


In [11]:
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=getenv("OPENROUTER_API_KEY"),
)

In [20]:
def evaluate_mcqs(mcqs, context, assessment_los):
    results = []
    for question in mcqs:
        prompt = f"""
        Evaluate the following MCQ based on the provided context in addition to the learning outcomes that the generated MCQ should achieve.
        Return your evaluation in valid JSON format with scores (0-1) and justifications.

        Context: {context}
        
        Learning Outcomes: {assessment_los}

        MCQ: {question}

        Required JSON format:
        {{
            "question": "<question_text>",
            "evaluations": {{
                "relevance_score": <0-1>,
                "relevance_justification": "<text>",
                "clarity_score": <0-1>,
                "clarity_justification": "<text>",
                "difficulty_score": <0-1>,
                "difficulty_justification": "<text>",
                "correctness_score": <0-1>,
                "correctness_justification": "<text>"
            }}
        }}
        
        Follow strictly, the JSON format and do not add anything extra or markdown.
        """

        try:
            response = client.chat.completions.create(
                model="deepseek/deepseek-chat:free",
                messages=[
                    {"role": "system", "content": "You are an MCQ evaluation assistant. Always respond with valid JSON in the exact format specified."},
                    {"role": "user", "content": prompt}
                ],
            )

            eval_data = json.loads(response.choices[0].message.content)
            results.append({
                'Question': eval_data['question'],
                'Relevance Score': eval_data['evaluations']['relevance_score'],
                'Relevance Justification': eval_data['evaluations']['relevance_justification'],
                'Clarity Score': eval_data['evaluations']['clarity_score'],
                'Clarity Justification': eval_data['evaluations']['clarity_justification'],
                'Difficulty Score': eval_data['evaluations']['difficulty_score'],
                'Difficulty Justification': eval_data['evaluations']['difficulty_justification'],
                'Correctness Score': eval_data['evaluations']['correctness_score'],
                'Correctness Justification': eval_data['evaluations']['correctness_justification']
            })
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON for question: {question}\nError: {str(e)}")
            continue

    return pd.DataFrame(results)

In [21]:
evaluate_mcqs(mcqs, context)

Error parsing JSON for question: Q2. Which of the following best describes a "wicked problem"?  
A) A problem that has a simple solution  
B) A problem that is easy to define  
C) A problem with many interdependencies and no clear solution  
D) A problem that occurs regularly  
Correct Answer: C) A problem with many interdependencies and no clear solution  
Difficulty: Medium  
Error: Expecting value: line 1 column 1 (char 0)
Error parsing JSON for question: Q3. Which of the following is NOT a characteristic of imperative programming languages?  
A) They follow a step-by-step procedure  
B) They include procedural, object-oriented, and scripting languages  
C) They are primarily declarative in nature  
D) They allow direct manipulation of memory and variables  
Correct Answer: C) They are primarily declarative in nature  
Difficulty: Hard  
Error: Expecting value: line 1 column 1 (char 0)
Error parsing JSON for question: Q4. Which of the following is NOT considered a programming langua

Unnamed: 0,Question,Relevance Score,Relevance Justification,Clarity Score,Clarity Justification,Difficulty Score,Difficulty Justification,Correctness Score,Correctness Justification
0,What is the first step in the Six-Step Problem...,1,The question directly pertains to the context ...,1,"The question is clear and concise, with the op...",0.2,The difficulty level is easy as the correct an...,1,The correct answer is accurately identified as...
1,What will be the output of the following Pytho...,1,The question is relevant because it tests the ...,1,"The question is clear and straightforward, pre...",0.6,The difficulty is medium because it requires t...,1,"The correct answer is 'TypeError', which is co..."
2,Which operator is used in Python to perform in...,1,The question directly tests knowledge of Pytho...,1,"The question is clearly stated, with all optio...",0.7,The question's difficulty is medium as underst...,1,The question correctly identifies '//' as the ...
3,"In Python, what will be the result of the foll...",1,The question is highly relevant as it directly...,1,"The question is clear and concise, displaying ...",0.5,The difficulty is moderate because although di...,1,"The correct answer is B) 2.5, and it is approp..."
4,What will be the output of the following Pytho...,1,The question is relevant as it tests the learn...,1,"The question is clearly stated, with the Pytho...",0.2,The question is fairly easy as it tests basic ...,1,"The correct answer is A) True, which accuratel..."
