In [65]:
import os
import json
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [66]:
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

True

In [67]:
from langchain.chat_models import ChatOpenAI

In [68]:
KEY = os.getenv("OPENAI_API_KEY")

In [69]:
llm = ChatOpenAI(openai_api_key=KEY, model_name="gpt-3.5-turbo", temperature=0.5)

In [70]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain
from langchain.callbacks import get_openai_callback

In [71]:
RESPONSE_JSON = {
    "1": {
        "MCQ": "multiple choice question",
        "Options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "Correct": "correct answer",
    },
    "2": {
        "MCQ": "multiple choice question",
        "Options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "Correct": "correct answer",
    },
    "3": {
        "MCQ": "multiple choice question",
        "Options": {
            "a": "choice here",
            "b": "choice here",
            "c": "choice here",
            "d": "choice here",
        },
        "Correct": "correct answer",
    }
}

In [72]:
QUIZ_GENERATION_TEMPLATE = """
    Text: {text}
    You are an expert MCQ generator. Given the above text, it is your job to create a quiz of {num_questions} multiple choice questions for {subject} students in {tone} tone. 
    Make sure of the following: (1) The questions are not repeated, and check all the questions to be conforming the text as well, (2) Use RESPONSE_JSON defined below as a guide to format your response, and (3) create {num_questions} MCQs.
    
    ### RESPONSE_JSON
    {response_json}
"""

In [73]:
quiz_generation_prompt = PromptTemplate(
    input_variables=["text", "num_questions", "subject", "tone", "response_json"],
    template=QUIZ_GENERATION_TEMPLATE
)

In [74]:
quiz_chain = LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)

In [75]:
QUIZ_EVALUATION_TEMPLATE = """
    You are an expert English grammarian and writer. Given a Multiple Choice Quiz for {subject} students,
    You need to evaluate the complexity of the question and give a complete analysis of the quiz. Use only a max of 50 words for complexity analysis. 
    If the quiz is not at par with the cognitive and analytical abilities of the students, update the quiz questions that need to be changed, and change the tone such that it perfectly fits the student's abilities
    
    Here is the quiz you need to evaluate:
    
    Quiz_MCQs:
    {quiz}
"""

In [76]:
quiz_evaluation_prompt = PromptTemplate(
    input_variables=["subject", "quiz"], 
    template=QUIZ_EVALUATION_TEMPLATE
)

In [77]:
review_chain = LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)

In [78]:
generate_evaluate_chain = SequentialChain(
    chains=[quiz_chain, review_chain], 
    input_variables=["text", "num_questions", "subject", "tone", "response_json"],
    output_variables=["quiz", "review"], 
    verbose=True
)

In [79]:
data_science = "../dataset/input/data_science.txt"

In [80]:
with open(data_science, 'r') as file:
    TEXT = file.read()

In [81]:
print(TEXT)

What is data science?
Data science is the study of data to extract meaningful insights for business. It is a multidisciplinary approach that combines principles and practices from the fields of mathematics, statistics, artificial intelligence, and computer engineering to analyze large amounts of data. This analysis helps data scientists to ask and answer questions like what happened, why it happened, what will happen, and what can be done with the results.

Why is data science important?
Data science is important because it combines tools, methods, and technology to generate meaning from data. Modern organizations are inundated with data; there is a proliferation of devices that can automatically collect and store information. Online systems and payment portals capture more data in the fields of e-commerce, medicine, finance, and every other aspect of human life. We have text, audio, video, and image data available in vast quantities.  

History of data science
While the term data scie

In [82]:
NUM_QUESTIONS = 5 
SUBJECT = "Data Science"
TONE = "Simple"

In [83]:
# Setup Token Usage Tracking in LangChain
with get_openai_callback() as cb:
    response = generate_evaluate_chain(
        {
            "text": TEXT,
            "num_questions": NUM_QUESTIONS,
            "subject": SUBJECT,
            "tone": TONE,
            "response_json": json.dumps(RESPONSE_JSON)
        }
    )



[1m> Entering new SequentialChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    Text: What is data science?
Data science is the study of data to extract meaningful insights for business. It is a multidisciplinary approach that combines principles and practices from the fields of mathematics, statistics, artificial intelligence, and computer engineering to analyze large amounts of data. This analysis helps data scientists to ask and answer questions like what happened, why it happened, what will happen, and what can be done with the results.

Why is data science important?
Data science is important because it combines tools, methods, and technology to generate meaning from data. Modern organizations are inundated with data; there is a proliferation of devices that can automatically collect and store information. Online systems and payment portals capture more data in the fields of e-commerce, medicine, finance, and every other aspe

In [84]:
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost: ${cb.total_cost}")

Total Tokens: 2451
Prompt Tokens: 1953
Completion Tokens: 498
Total Cost: $0.0039255


In [85]:
print(response)

{'text': 'What is data science?\nData science is the study of data to extract meaningful insights for business. It is a multidisciplinary approach that combines principles and practices from the fields of mathematics, statistics, artificial intelligence, and computer engineering to analyze large amounts of data. This analysis helps data scientists to ask and answer questions like what happened, why it happened, what will happen, and what can be done with the results.\n\nWhy is data science important?\nData science is important because it combines tools, methods, and technology to generate meaning from data. Modern organizations are inundated with data; there is a proliferation of devices that can automatically collect and store information. Online systems and payment portals capture more data in the fields of e-commerce, medicine, finance, and every other aspect of human life. We have text, audio, video, and image data available in vast quantities.  \n\nHistory of data science\nWhile t

In [86]:
quiz = response.get("quiz")
review = response.get("review")

In [87]:
print(quiz)

{
    "1": {
        "MCQ": "What is data science primarily used for?",
        "Options": {
            "a": "Studying history",
            "b": "Extracting insights from data for business",
            "c": "Creating art",
            "d": "Building websites"
        },
        "Correct": "b"
    },
    "2": {
        "MCQ": "Which type of data analysis examines data to understand why something happened?",
        "Options": {
            "a": "Descriptive analysis",
            "b": "Diagnostic analysis",
            "c": "Predictive analysis",
            "d": "Prescriptive analysis"
        },
        "Correct": "b"
    },
    "3": {
        "MCQ": "What does prescriptive analysis do?",
        "Options": {
            "a": "Predicts future data patterns",
            "b": "Analyzes past data",
            "c": "Recommends the best course of action",
            "d": "Creates data visualizations"
        },
        "Correct": "c"
    },
    "4": {
        "MCQ": "What is one bene

In [88]:
print(review)

Complexity Analysis:
The quiz is suitable for data science students as it covers basic concepts such as the purpose of data science, types of data analysis, benefits for businesses, and innovation. The questions are clear and straightforward, requiring fundamental knowledge of data science concepts.

Suggestions for Improvement:
Consider adding more in-depth questions that require critical thinking or practical application of data science principles. Additionally, include scenarios or case studies to enhance the practical understanding of data science concepts. Overall, the quiz can be enhanced by incorporating a variety of question types to assess different cognitive skills.


In [89]:
quiz = json.loads(quiz)

In [90]:
formatted_quiz = []
for key, value in quiz.items():
    mcq = value["MCQ"]
    options = " | ".join([f"{option}: {option_value}" for option, option_value in value["Options"].items()])
    correct = value["Correct"]
    formatted_quiz.append({"MCQ": mcq, "Choices": options, "Correct": correct})

In [91]:
quiz = pd.DataFrame(formatted_quiz)

In [92]:
quiz

Unnamed: 0,MCQ,Choices,Correct
0,What is data science primarily used for?,a: Studying history | b: Extracting insights f...,b
1,Which type of data analysis examines data to u...,a: Descriptive analysis | b: Diagnostic analys...,b
2,What does prescriptive analysis do?,a: Predicts future data patterns | b: Analyzes...,c
3,What is one benefit of data science for busine...,a: Causes disruptions in business activity | b...,c
4,How can data science help companies innovate?,a: By ignoring customer feedback | b: By uncov...,b


In [93]:
quiz.to_csv("../dataset/output/data_science.csv", index=False)