In [3]:
!pip install pypdf langchain langchain_google_genai langchain_community

Collecting langchain_community
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.22.0-py3-none-any.whl.metadata (7.2 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.2.16-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (

In [18]:
import os
from typing import List, Union
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

In [6]:
# setting up api key
from google.colab import userdata
import os
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

# initialize google model \
llm = GoogleGenerativeAI(model="gemini-pro")

In [19]:
def get_pdf_text(pdf_docs):
    pdf_text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            pdf_text += page.extract_text()
    return pdf_text

#test
pdf_docs = ["/content/Ulm.pdf"]
get_pdf_text(pdf_docs)

'Universal Language Model Fine-tuning for Text Classiﬁcation\nJeremy Howard∗\nfast.ai\nUniversity of San Francisco\nj@fast.aiSebastian Ruder∗\nInsight Centre, NUI Galway\nAylien Ltd., Dublin\nsebastian@ruder.io\nAbstract\nInductive transfer learning has greatly im-\npacted computer vision, but existing ap-\nproaches in NLP still require task-speciﬁc\nmodiﬁcations and training from scratch.\nWe propose Universal Language Model\nFine-tuning (ULMFiT), an effective trans-\nfer learning method that can be applied to\nany task in NLP, and introduce techniques\nthat are key for ﬁne-tuning a language\nmodel. Our method signiﬁcantly outper-\nforms the state-of-the-art on six text clas-\nsiﬁcation tasks, reducing the error by 18-\n24% on the majority of datasets. Further-\nmore, with only 100labeled examples, it\nmatches the performance of training from\nscratch on 100×more data. We open-\nsource our pretrained models and code1.\n1 Introduction\nInductive transfer learning has had a large impact

In [20]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50
    )
    chunks = text_splitter.split_text(text)
    return chunks

#test
pdf_text = get_pdf_text(pdf_docs)
print(len(get_text_chunks(pdf_text)))

49


In [21]:
class QuizQuestion(BaseModel):
    question: str
    options: Union[List[str], None] = Field(description="List of options for multiple choice questions, None for True/False")
    correct_answer: str = Field(description="A, B, C, or D for multiple choice; A or B for true/false questions")
    explanation: str

class Quiz(BaseModel):
    questions: List[QuizQuestion]

In [23]:

def generate_questions_from_pdf(pdf_text, num_questions, quiz_type, quiz_context, difficulty_level):
    output_parser = PydanticOutputParser(pydantic_object=Quiz)

    prompt_template = PromptTemplate(
        template=f"""
You are an AI-powered quiz generator. Your task is to create a quiz based on the following parameters:

Number of questions: {{num_questions}}
Quiz type: {{quiz_type}}
Topic/Context: {{quiz_context}}
Difficulty level: {{difficulty_level}}

Guidelines:
1. Generate ONLY {{quiz_type}} questions based on the content of the uploaded PDF.
2. Ensure all questions are related to the specified topic/context: {{quiz_context}}
3. Adhere strictly to the specified difficulty level: {{difficulty_level}}

Difficulty Level Guidelines:
Hard:
Multiple Choice Questions:
   - Ensure distractors (wrong answers) are plausible and based on common misconceptions or errors in understanding.
   - Include answers that require higher-order thinking, such as application of concepts or analysis of information.
   - Consider using "All of the above" or "None of the above" options strategically.
   - For language or writing-related questions, include answers with subtle grammatical or stylistic differences.

True/False Questions:
   - Include statements that require deep understanding of nuances or exceptions to rules.
   - Use complex sentences that combine true and false elements to test careful reading and comprehension.
   - Incorporate statements that challenge common assumptions or misconceptions in the field.
   - Provide brief explanations for why each statement is true or false, focusing on key points of understanding.

Medium:
Multiple Choice Questions:
   - Include distractors that are plausible but distinguishable from the correct answer with careful thought.
   - Test application of concepts rather than just recall, but avoid overly complex scenarios.
   - Use clear, unambiguous language in both the question stem and answer choices.
   - Occasionally include "All of the above" or "None of the above" options, but not too frequently.

True/False Questions:
   - Create statements that require more than surface-level knowledge to evaluate.
   - Include some statements that have qualifiers (e.g., "always," "never," "sometimes") to test for exceptions.
   - Balance the number of true and false statements.
   - Provide brief explanations for the correct answer, focusing on the key determining factors.

Easy:
Multiple Choice Questions:
   - Use straightforward language in both the question stem and answer choices.
   - Test basic recall of key concepts, definitions, or facts.
   - Make the correct answer clearly distinguishable from the distractors.
   - Limit the number of answer choices to 3-4 options.

True/False Questions:
   - Create clear, unambiguous statements about fundamental course concepts.
   - Avoid using absolutes like "always" or "never" unless they are definitively true or false.
   - Focus on testing recall of key facts or basic understanding of concepts.
   - Provide a brief, straightforward explanation for the correct answer.



Use the following text as context for generating questions, but only if it's relevant to {{quiz_context}}:
{{pdf_text}}

{{format_instructions}}
        """,
        input_variables=["num_questions", "quiz_type", "quiz_context", "difficulty_level", "pdf_text"],
        partial_variables={"format_instructions": output_parser.get_format_instructions()}
    )

    # Create the LLMChain
    llm_chain = LLMChain(llm=llm, prompt=prompt_template)

    # Run the chain with all required inputs
    result = llm_chain.invoke({
        "num_questions": num_questions,
        "quiz_type": quiz_type,
        "quiz_context": quiz_context,
        "difficulty_level": difficulty_level,
        "pdf_text": pdf_text
    })

    # Parse the result
    return output_parser.parse(result)


In [24]:
def run_quiz(quiz: Quiz):
    score = 0
    total_questions = len(quiz.questions)

    for i, question in enumerate(quiz.questions, 1):
        print(f"\nQuestion {i}:")
        print(question.question)

        if question.options:  # Multiple choice question
            for j, option in enumerate(question.options):
                print(f"{chr(65 + j)}. {option}")
            # error handling
            while True:
                user_answer = input("Your answer (A, B, C, or D): ").strip().upper()
                if user_answer in ['A', 'B', 'C', 'D']:
                    break
                else:
                    print("Invalid input. Please enter A, B, C, or D.")
        else:  # True/False question
            print("A. True")
            print("B. False")
            while True:
                user_answer = input("Your answer (A or B): ").strip().upper()
                if user_answer in ['A', 'B']:
                    break
                else:
                    print("Invalid input. Please enter A or B.")

        # Determine correct answer for true/false
        if (user_answer == 'A' and question.correct_answer == 'True') or (user_answer == 'B' and question.correct_answer == 'False'):
            print("Correct!")
            score += 1
        else:
            print("Incorrect!")
            print(f"Correct answer: {question.correct_answer}")

        print(f"Explanation: {question.explanation}")

    print(f"\nQuiz completed! Your score: {score}/{total_questions}")
    return score, total_questions

In [25]:
def main():
    # Get PDF file path
    pdf_path = input("Enter the path to your PDF file: ")
    pdf_text = get_pdf_text([pdf_path])
    print(f"PDF text length: {len(pdf_text)} characters")

    # Get user inputs
    quiz_context = input("Enter the concept you want to be tested on: ")
    quiz_type = input("Choose quiz type (multiple choice or true/false): ").lower()
    num_questions = int(input("Enter the number of questions you want: "))
    difficulty_level = input("Choose difficulty level (easy, medium, or hard): ").lower()

    print("\nGenerating quiz with the following parameters:")
    print(f"Quiz context: {quiz_context}")
    print(f"Quiz type: {quiz_type}")
    print(f"Number of questions: {num_questions}")
    print(f"Difficulty level: {difficulty_level}")

    try:
        # Generate quiz
        quiz = generate_questions_from_pdf(pdf_text, num_questions, quiz_type, quiz_context, difficulty_level)

        # Run quiz
        score, total_questions = run_quiz(quiz)

        # Print final score
        print(f"\nFinal Score: {score}/{total_questions}")
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        print("Error details:")
        import traceback
        traceback.print_exc()
        print("Please try again or contact support if the problem persists.")

if __name__ == "__main__":
    main()

Enter the path to your PDF file: /content/Ulm.pdf
PDF text length: 45922 characters
Enter the concept you want to be tested on: slanted triangular learning rates
Choose quiz type (multiple choice or true/false): multiple choice
Enter the number of questions you want: 3
Choose difficulty level (easy, medium, or hard): easy

Generating quiz with the following parameters:
Quiz context: slanted triangular learning rates
Quiz type: multiple choice
Number of questions: 3
Difficulty level: easy


  llm_chain = LLMChain(llm=llm, prompt=prompt_template)


An error occurred: 1 validation error for Generation
text
  str type expected (type=type_error.str)
Error details:
Please try again or contact support if the problem persists.


Traceback (most recent call last):
  File "<ipython-input-25-260289d4d88a>", line 21, in main
    quiz = generate_questions_from_pdf(pdf_text, num_questions, quiz_type, quiz_context, difficulty_level)
  File "<ipython-input-23-b69d7426a3d5>", line 82, in generate_questions_from_pdf
    return output_parser.parse(result)
  File "/usr/local/lib/python3.10/dist-packages/langchain_core/output_parsers/pydantic.py", line 82, in parse
    return super().parse(text)
  File "/usr/local/lib/python3.10/dist-packages/langchain_core/output_parsers/json.py", line 98, in parse
    return self.parse_result([Generation(text=text)])
  File "/usr/local/lib/python3.10/dist-packages/langchain_core/load/serializable.py", line 113, in __init__
    super().__init__(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/pydantic/v1/main.py", line 341, in __init__
    raise validation_error
pydantic.v1.error_wrappers.ValidationError: 1 validation error for Generation
text
  str type expected (type=typ