In [4]:
import os
import json
import pandas as pd
import traceback

from dotenv import load_dotenv
# Take env variables from env file
load_dotenv()

True

In [3]:
# Load env variables
key = os.getenv(OPENAI_API_KEY) #Do the same for other API key

# Do the same for Hugging face endpoint API / open-source models
# Also check for updated Langchain documentation for correct implementation
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(openai_api_key=key, model_name="gpt-3.5-turbo", temperature=0.5)

In [None]:
# Imports for Langchain modules for Prompts, Chains, and getting output as well as Document Loader
from langchain.llms import OpenAI 
from lanchain.prompts import PromptTemplate
from lanchain.chains import LLMChain
from lanchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback
import PyPDF2

#### Generation of Quiz using Prompt Template annd LLM Chain to run LLM Model

In [8]:
# Response format for output of LLM
RESPONSE_JSON = {
    "1": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "2": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
    "3": {
        "mcq": "multiple choice question",
        "options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "correct": "correct answer",
    },
}

In [None]:
# Prompt Template for User query to LLM model
TEMPLATE="""
Text:{text}
You are an expert MCQ maker. Given the above text, it is your job to \
create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. 
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \
Ensure to make {number} MCQs
### RESPONSE_JSON
{response_json}
"""

In [None]:
# Quiz generation prompt with response and prompt template
quiz_generation_prompt = PromptTemplate(
    input_variables = ["text", "number", "subject", "tone", "response_json"],
    template = TEMPLATE
)

In [None]:
# Define LLM Chain for executing the LLM Model
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

##### Evaluation or review of generated Quiz

In [None]:
# Second template to generate prompt template to evaluate the generated quiz
TEMPLATE2="""
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. 
if the quiz is not at per with the cognitive and analytical abilities of the students,\
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
Quiz_MCQs:
{quiz}

Check from an expert English Writer of the above quiz:
"""

In [None]:
# Evaluation or review of generated Quiz Prompt Template
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE)

In [None]:
# Evaluation or review of generated Quiz Chain
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [None]:
# Combined Sequential Chain to generate quiz and review it to give final output Quiz
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"], output_variables=["quiz", "review"], verbose=True,)

#### Loading the Document for data based on which to generate the quiz

In [6]:
#### Loading the Document
file_path = r"C:\Users\mahav\OneDrive\Desktop\MCQ-Generator\data.txt"
with open(file_path, 'r') as file:
    TEXT = file.read()

# print(TEXT)    

Data science is an interdisciplinary academic field[1] that uses statistics, scientific computing, scientific methods, processes, algorithms and systems to extract or extrapolate knowledge and insights from noisy, structured, and unstructured data.[2]

Data science also integrates domain knowledge from the underlying application domain (e.g., natural sciences, information technology, and medicine).[3] Data science is multifaceted and can be described as a science, a research paradigm, a research method, a discipline, a workflow, and a profession.[4]

Data science is a "concept to unify statistics, data analysis, informatics, and their related methods" to "understand and analyze actual phenomena" with data.[5] It uses techniques and theories drawn from many fields within the context of mathematics, statistics, computer science, information science, and domain knowledge.[6] However, data science is different from computer science and information science. Turing Award winner Jim Gray imag

In [9]:
# json.dumps(RESPONSE_JSON)

'{"1": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "2": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}, "3": {"mcq": "multiple choice question", "options": {"a": "choice here", "b": "choice here", "c": "choice here", "d": "choice here"}, "correct": "correct answer"}}'

In [None]:
# TEXT we created from loading the document
NUMBER=5 
SUBJECT="data science"
TONE="simple"

In [None]:
#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking

#How to setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response=generate_evaluate_chain(
        {
            "text": TEXT,
            "number": NUMBER,
            "subject":SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
        )

In [None]:
# Total tokens cost associated with calling the API using get_openai_callback
print(f"Total Tokens:{cb.total_tokens}")
print(f"Prompt Tokens:{cb.prompt_tokens}")
print(f"Completion Tokens:{cb.completion_tokens}")
print(f"Total Cost:{cb.total_cost}")

In [None]:
# After running the LLM Chain on a LLM model we get a response
quiz_str=response.get("quiz")
quiz_dict = json.loads(quiz_str) #Output Quiz

In [None]:
#Quiz in table data format
quiz_table_data = []
for key, value in quiz_dict.items():
    mcq = value["mcq"]
    options = " | ".join(
        [
            f"{option}: {option_value}"
            for option, option_value in value["options"].items()
            ]
        )
    correct = value["correct"]
    quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [None]:
final_quiz=pd.DataFrame(quiz_table_data)
final_quiz.to_csv("Data_Science_Quiz.csv",index=False)