# MCQ Generator Experiment

In [2]:
import json
import os
import traceback
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
from pathlib import Path

In [10]:
load_dotenv()
KEY = os.getenv('OPENAI_API_KEY')

In [11]:
KEY

'sk-TYaNXG5jR1XxyxdykwV9T3BlbkFJHQCsStwwroMhaWenRzU'

In [38]:
llm = ChatOpenAI(model='gpt-3.5-turbo', temperature = 0.3, api_key = KEY)

In [39]:
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7f2c971d3200>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7f2c971d6450>, temperature=0.3, openai_api_key='sk-TYaNXG5jR1XxyxdykwV9T3BlbkFJHQCsStwwroMhaWenRzU', openai_proxy='')

In [40]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
import json
import pandas as pd
from langchain.callbacks import get_openai_callback

In [41]:
TEMPLATE = """
Text: {text}
You are an expert MCQ maker. Given the above text, it is your job to
create a a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide.
Ensure to make {number} MCQs.

### RESPONSE_JSON
{response_json} 
"""

print(TEMPLATE)


Text: {text}
You are an expert MCQ maker. Given the above text, it is your job to
create a a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like RESPONSE_JSON below and use it as a guide.
Ensure to make {number} MCQs.

### RESPONSE_JSON
{response_json} 



In [42]:
quiz_generation_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE
)

In [43]:
RESPONSE_JSON: dict = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer"
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer"
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer"
    },
}

In [44]:
quiz_chain = LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [45]:
TEMPLATE2: str = """
You are an expert english grammarian and writer. Given a a Multiple Choice Quiz for {subject} students.
You need to evaluate the complexity of the questions and give a complete analysis of the quiz. Only use up max 50 words for complexity
if the quiz is not at per with the cognitive and analytical abilities of the students,
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the students abilities.
MCQ Quiz:
{quiz}

Check from an expert English writer of the above quiz.
"""

print(TEMPLATE2)


You are an expert english grammarian and writer. Given a a Multiple Choice Quiz for {subject} students.
You need to evaluate the complexity of the questions and give a complete analysis of the quiz. Only use up max 50 words for complexity
if the quiz is not at per with the cognitive and analytical abilities of the students,
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the students abilities.
MCQ Quiz:
{quiz}

Check from an expert English writer of the above quiz.



In [46]:
review_prompt = PromptTemplate(
    input_variables = ["subject", "quiz"],
    template = TEMPLATE2
)

In [47]:
review_chain = LLMChain(llm=llm, prompt=review_prompt, output_key="review", verbose=True)

In [48]:
sequential_chain = SequentialChain(
    chains = [quiz_chain, review_chain], 
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    output_variables = ["quiz", "review"],
    verbose=True
)

In [49]:
SequentialChain.__annotations__

{'chains': typing.List[langchain.chains.base.Chain],
 'input_variables': typing.List[str],
 'output_variables': typing.List[str],
 'return_all': bool}

In [50]:
with open(Path("data.txt")) as file:
    data = file.read()

In [51]:
print(data)

Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]

Biologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdisciplines within biology, each defined by the nature of their research questions and the tools that they use.[7][8][9] Like other scientists, bio

In [52]:
NUMBER: int = 5
TONE: str = "simple"
SUBJECT: str = "biology"

In [53]:
# How to setup token usage Tracking in Langchain
with get_openai_callback() as cb: 
    response = sequential_chain.invoke({
        "text": data,
        "number": NUMBER,
        "subject": SUBJECT,
        "tone": TONE,
        "response_json": json.dumps(RESPONSE_JSON)  
    })
    print(cb)



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
Text: Biology is the scientific study of life.[1][2][3] It is a natural science with a broad scope but has several unifying themes that tie it together as a single, coherent field.[1][2][3] For instance, all organisms are made up of cells that process hereditary information encoded in genes, which can be transmitted to future generations. Another major theme is evolution, which explains the unity and diversity of life.[1][2][3] Energy processing is also important to life as it allows organisms to move, grow, and reproduce.[1][2][3] Finally, all organisms are able to regulate their own internal environments.[1][2][3][4][5]

Biologists are able to study life at multiple levels of organization,[1] from the molecular biology of a cell to the anatomy and physiology of plants and animals, and evolution of populations.[1][6] Hence, there are multiple subdisciplin

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-TYaNX**************************************nRzU. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
print("Total cost:", cb.total_cost)
print("Total tokens:", cb.total_tokens)
print("Prompt tokens:", cb.prompt_tokens)
print("Completion tokens:", cb.completion_tokens)
print("Successful requests:", cb.successful_requests)

Total cost: 0.003813
Total tokens: 2397
Prompt tokens: 1962
Completion tokens: 435
Successful requests: 2


In [None]:
quiz = response.get("quiz")
quiz

'\n{\n    "1": {\n        "mcq": "What is the scientific study of life called?",\n        "options": {\n            "a": "Chemistry",\n            "b": "Biology",\n            "c": "Physics",\n            "d": "Geology"\n        },\n        "correct": "b"\n    },\n    "2": {\n        "mcq": "Which scientist is known for his work on the theory of evolution by natural selection?",\n        "options": {\n            "a": "Gregor Mendel",\n            "b": "Anton van Leeuwenhoek",\n            "c": "Charles Darwin",\n            "d": "Robert Hooke"\n        },\n        "correct": "c"\n    },\n    "3": {\n        "mcq": "What is the basic unit of organisms according to cell theory?",\n        "options": {\n            "a": "Atoms",\n            "b": "Molecules",\n            "c": "Cells",\n            "d": "Tissues"\n        },\n        "correct": "c"\n    },\n    "4": {\n        "mcq": "Who is credited with the discovery of the double-helical structure of DNA?",\n        "options": {\n    

In [None]:
json_quiz = json.loads(quiz)
json_quiz

{'1': {'mcq': 'What is the scientific study of life called?',
  'options': {'a': 'Chemistry',
   'b': 'Biology',
   'c': 'Physics',
   'd': 'Geology'},
  'correct': 'b'},
 '2': {'mcq': 'Which scientist is known for his work on the theory of evolution by natural selection?',
  'options': {'a': 'Gregor Mendel',
   'b': 'Anton van Leeuwenhoek',
   'c': 'Charles Darwin',
   'd': 'Robert Hooke'},
  'correct': 'c'},
 '3': {'mcq': 'What is the basic unit of organisms according to cell theory?',
  'options': {'a': 'Atoms', 'b': 'Molecules', 'c': 'Cells', 'd': 'Tissues'},
  'correct': 'c'},
 '4': {'mcq': 'Who is credited with the discovery of the double-helical structure of DNA?',
  'options': {'a': 'Gregor Mendel',
   'b': 'James Watson and Francis Crick',
   'c': 'Alfred Hershey and Martha Chase',
   'd': 'Har Gobind Khorana'},
  'correct': 'b'},
 '5': {'mcq': 'When was the Human Genome Project launched?',
  'options': {'a': '1980', 'b': '1990', 'c': '2000', 'd': '2010'},
  'correct': 'b'}}

In [None]:
quiz_table_data = []
for key, value in json_quiz.items():
    options = "|".join(value["options"])
    quiz_table_data.append({
        "MCQ": value["mcq"],
        "options": " | ".join([f"{option} : {option_value}" for option, option_value in value["options"].items()]),
        "correct": value["correct"]
    })

In [None]:
quiz_table_data

[{'MCQ': 'What is the scientific study of life called?',
  'options': 'a : Chemistry | b : Biology | c : Physics | d : Geology',
  'correct': 'b'},
 {'MCQ': 'Which scientist is known for his work on the theory of evolution by natural selection?',
  'options': 'a : Gregor Mendel | b : Anton van Leeuwenhoek | c : Charles Darwin | d : Robert Hooke',
  'correct': 'c'},
 {'MCQ': 'What is the basic unit of organisms according to cell theory?',
  'options': 'a : Atoms | b : Molecules | c : Cells | d : Tissues',
  'correct': 'c'},
 {'MCQ': 'Who is credited with the discovery of the double-helical structure of DNA?',
  'options': 'a : Gregor Mendel | b : James Watson and Francis Crick | c : Alfred Hershey and Martha Chase | d : Har Gobind Khorana',
  'correct': 'b'},
 {'MCQ': 'When was the Human Genome Project launched?',
  'options': 'a : 1980 | b : 1990 | c : 2000 | d : 2010',
  'correct': 'b'}]

In [None]:
quiz_df = pd.DataFrame(quiz_table_data)
quiz_df.to_csv("mcq.csv", index=False)