In [3]:
import os
import json
import pandas as pd
import traceback

In [4]:
from langchain_groq import ChatGroq

In [5]:
from dotenv import load_dotenv
load_dotenv()
key = os.getenv('groq_api')

In [6]:
llm = ChatGroq(
    groq_api_key = key,
    temperature=1,
    model="qwen-2.5-32b"
)

In [7]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import SequentialChain

In [8]:
template = """
Your are MCQ generator
Topic : {Topic}
Text : {Text}
If you find text as None then, as per topic given generate MCQs, else if the data is not enough try to genearte as per you knowledge on topic.
Now create {Count} MCQs on {Mode} as per topic given by the user.
The reponse should be in form of json as 
### Response formate
{json_formate}
In this way 
"""

In [9]:
response_formate = {
    "1" :{
        "Question" : "question",
        "options" : {
            "a" : "choice 1",
            "b" : "choice 2",
            "c" : "choice 3",
            "d" : "choice 4",

        },
    },
    "2" : {},
}

In [10]:
quiz_generate_prompt = PromptTemplate(
    input_variables = ['Topic','Text','Count','Mode','json_formate'],
    template=template
)

# Lets do assume I have only topic

In [11]:
text = 'None'

In [12]:
answers_formate = {
    "1" :{
        'answer' :  "correct option",
        'reason' : "Provide a small descroption about why that option is correct."
    },
    "2" : {}
}

In [13]:
answers_template = '''
MCQS : {generated_mcqs}
As per above give correct answers as formate;
## Result formate :
{answers_formate}
In this way ...
'''

In [14]:
quiz_answers_prompt = PromptTemplate(
    input_variables=['generated_mcqs'],
    template=answers_template,
)

In [15]:
from langchain.chains import SequentialChain
from langchain.chains import LLMChain

chain1 = LLMChain(
    llm=llm,
    prompt = quiz_generate_prompt,
    output_key = "generated_mcqs"
    )

chain2 = LLMChain(
    llm=llm,
    prompt = quiz_answers_prompt,
    output_key = 'mcq_answers'
)


  chain1 = LLMChain(


In [16]:
main_chain1 = SequentialChain(
    chains = [chain1,chain2],
    input_variables = ['Topic','Text','Count','Mode','json_formate','answers_formate'],
    output_variables = ['generated_mcqs','mcq_answers']
)

In [17]:
response = main_chain1.invoke({
    "Topic" : "ML",
    "Text" : text,
    "Count" : 3,
    "Mode" : "Hard",
    "json_formate" : response_formate,
    'answers_formate' : answers_formate,
}
)

In [18]:
mcqs = response['generated_mcqs']

In [19]:
print(mcqs)

```json
{
  "1": {
    "Question": "In the context of machine learning, which algorithm inherently handles missing values without the need for preprocessing?",
    "options": {
      "a": "Random Forest",
      "b": "Support Vector Machine",
      "c": "K-Means Clustering",
      "d": "Logistic Regression"
    }
  },
  "2": {
    "Question": "Which of the following is a common regularization technique used to prevent overfitting in machine learning models?",
    "options": {
      "a": "Cross-validation",
      "b": "Gradient Descent",
      "c": "Lasso Regression",
      "d": "Covariance Matrix"
    }
  },
  "3": {
    "Question": "What is the primary difference between supervised and unsupervised learning algorithms?",
    "options": {
      "a": "Supervised algorithms do not require data to train, whereas unsupervised algorithms do.",
      "b": "Unsupervised algorithms require a labeled dataset, whereas supervised algorithms do not.",
      "c": "Supervised learning algorithms pred

In [20]:
raw_json = response['generated_mcqs']

if raw_json.startswith("```json"):
    raw_json = raw_json[7:]
if raw_json.endswith("```"):
    raw_json = raw_json[:-3] 

In [21]:
mcqs = json.loads(raw_json.strip())

In [22]:
raw_answers_json = response['mcq_answers']

In [23]:
print(raw_answers_json)

```json
{
  "1": {
    "answer": "a",
    "reason": "Random Forest is a type of ensemble learning method for classification, regression, and other tasks that operates by constructing a multitude of decision trees at training time. It inherently handles missing values by considering all possible ways to split one or the other of the values that are missing."
  },
  "2": {
    "answer": "c",
    "reason": "Lasso Regression (Least Absolute Shrinkage and Selection Operator) is a type of linear regression that uses shrinkage. Shrinkage basically means that data values are shrunk towards a central point, like the mean. The LASSO method performs regularization and variable selection by shrinking some of the regression coefficients to zero. This not only helps in reducing overfitting but also effectively selects a simpler model."
  },
  "3": {
    "answer": "d",
    "reason": "In supervised learning, the algorithm is trained on a labeled dataset, which means a dataset where we already have the

In [24]:
if raw_answers_json.startswith("```json"):
    raw_answers_json = raw_answers_json[7:]
if raw_answers_json.endswith("```"):
    raw_answers_json = raw_answers_json[:-3]

In [25]:
mcqs_answers = json.loads(raw_answers_json)

In [26]:
print(mcqs_answers)

{'1': {'answer': 'a', 'reason': 'Random Forest is a type of ensemble learning method for classification, regression, and other tasks that operates by constructing a multitude of decision trees at training time. It inherently handles missing values by considering all possible ways to split one or the other of the values that are missing.'}, '2': {'answer': 'c', 'reason': 'Lasso Regression (Least Absolute Shrinkage and Selection Operator) is a type of linear regression that uses shrinkage. Shrinkage basically means that data values are shrunk towards a central point, like the mean. The LASSO method performs regularization and variable selection by shrinking some of the regression coefficients to zero. This not only helps in reducing overfitting but also effectively selects a simpler model.'}, '3': {'answer': 'd', 'reason': 'In supervised learning, the algorithm is trained on a labeled dataset, which means a dataset where we already have the answers, and the goal is to learn a mapping fro

# If u have data

In [28]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader('/Users/kaif/Desktop/Projects/testing_mcq_gen/experiment/machine_learning_intro.pdf')
pages = loader.load_and_split()


In [29]:
text = pages[0].page_content

In [30]:
from langchain.chains import SequentialChain
from langchain.chains import LLMChain

chain1 = LLMChain(
    llm=llm,
    prompt = quiz_generate_prompt,
    output_key = "generated_mcqs"
    )

chain2 = LLMChain(
    llm=llm,
    prompt = quiz_answers_prompt,
    output_key = 'mcq_answers'
)


In [31]:
main_chain1 = SequentialChain(
    chains = [chain1,chain2],
    input_variables = ['Topic','Text','Count','Mode','json_formate','answers_formate'],
    output_variables = ['generated_mcqs','mcq_answers']
)

In [32]:
response = main_chain1.invoke({
    "Topic" : "ML",
    "Text" : text,
    "Count" : 3,
    "Mode" : "Hard",
    "json_formate" : response_formate,
    'answers_formate' : answers_formate,
}
)

In [33]:
mcqs = response['generated_mcqs']

In [34]:
print(mcqs)

```json
{
  "1": {
    "Question": "Which machine learning type relies on labeled data to train models for tasks such as classification and regression?",
    "options": {
      "a": "Reinforcement Learning",
      "b": "Supervised Learning",
      "c": "Unsupervised Learning",
      "d": "Semi-supervised Learning"
    }
  },
  "2": {
    "Question": "In reinforcement learning, what does the environment provide to the agent after each action to train the model?",
    "options": {
      "a": "Labels",
      "b": "Rewards",
      "c": "Features",
      "d": "Clusters"
    }
  },
  "3": {
    "Question": "Which algorithm is particularly known for finding patterns in unlabeled datasets by grouping data points into clusters?",
    "options": {
      "a": "Decision Trees",
      "b": "Linear Regression",
      "c": "K-Means Clustering",
      "d": "Neural Networks"
    }
  }
}
```


In [35]:
raw_json = response['generated_mcqs']

if raw_json.startswith("```json"):
    raw_json = raw_json[7:]
if raw_json.endswith("```"):
    raw_json = raw_json[:-3] 

In [36]:
mcqs = json.loads(raw_json.strip())

In [37]:
mcqs

{'1': {'Question': 'Which machine learning type relies on labeled data to train models for tasks such as classification and regression?',
  'options': {'a': 'Reinforcement Learning',
   'b': 'Supervised Learning',
   'c': 'Unsupervised Learning',
   'd': 'Semi-supervised Learning'}},
 '2': {'Question': 'In reinforcement learning, what does the environment provide to the agent after each action to train the model?',
  'options': {'a': 'Labels',
   'b': 'Rewards',
   'c': 'Features',
   'd': 'Clusters'}},
 '3': {'Question': 'Which algorithm is particularly known for finding patterns in unlabeled datasets by grouping data points into clusters?',
  'options': {'a': 'Decision Trees',
   'b': 'Linear Regression',
   'c': 'K-Means Clustering',
   'd': 'Neural Networks'}}}

In [38]:
raw_answers_json = response['mcq_answers']

In [39]:
if type(raw_answers_json) == str:
    if raw_answers_json.startswith("```json"):
        raw_answers_json = raw_answers_json[7:]
    if raw_answers_json.endswith("```"):
        raw_answers_json = raw_answers_json[:-3]
    mcqs_answers = json.loads(raw_answers_json)
else :
    mcqs_answers = raw_answers_json

In [40]:
mcqs_answers

{'1': {'answer': 'b',
  'reason': 'Supervised Learning relies on labeled data for training its models to perform tasks such as classification and regression, where the dataset includes labels or known outcomes.'},
 '2': {'answer': 'b',
  'reason': 'In reinforcement learning, each action taken by the agent in the environment is followed by a reward, which informs the agent whether its action was positive or negative with respect to the goal.'},
 '3': {'answer': 'c',
  'reason': 'K-Means Clustering is particularly known for its ability to group data points into clusters in unlabeled datasets, effectively finding patterns without predefined labels.'}}