## Parse pdf and generate question bank

In [1]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader('100q.pdf')

data = loader.load()
data

[Document(page_content='-1-\n*  If you are 65 years old or older and have been a legal permanent resident of the United States for 20 or more years, you \nmay study just the questions that have been marked with an asterisk.\nwww.uscis.gov(rev. 01/19)\nCivics (History and Government) Questions for the Naturalization Test\nThe 100 civics (history and government) questions and answers for the naturalization test are listed below. The civics test \nis an oral test and the USCIS Officer will ask the applicant up to 10 of the 100 civics questions. An applicant must answer \n6 out of 10 questions correctly to pass the civics portion of the naturalization test. \nOn the naturalization test, some answers may change because of elections or appointments. As you study for the test, \nmake sure that you know the most current answers to these questions. Answer these questions with the name of the official \nwho is serving at the time of your eligibility interview with USCIS. The USCIS Officer will n

In [3]:
from pydantic import BaseModel, Field, validator
from langchain.output_parsers import PydanticOutputParser
from typing import List

class QA(BaseModel):
    question: str = Field(description="question")
    answer: str = Field(description="answer")
        
class QAs(BaseModel):
    items: List[QA]

parser = PydanticOutputParser(pydantic_object=QAs)

In [156]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', max_tokens=1500)
prompt = PromptTemplate(
    input_variables=["document"],
    template="Parse the document and capture all questions and answers. Question will come with a number in front, and there might be multiple answers but you should parse it as one string. Do not return anything else.\n {format_instructions}. Document:\n {document}.\n ",
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [158]:
from langchain.callbacks import get_openai_callback
master = QAs(items=[])
for doc in data:
    _input = prompt.format_prompt(document=doc.page_content)
    with get_openai_callback() as cb:
        output = llm.predict(_input.to_string())
        current_qa = parser.parse(output)
    print(cb)
    print(output)
    master.items.extend(current_qa.items)


Tokens Used: 1024
	Prompt Tokens: 785
	Completion Tokens: 239
Successful Requests: 1
Total Cost (USD): $0.002048
{
  "items": [
    {
      "question": "What is the supreme law of the land?",
      "answer": "the Constitution"
    },
    {
      "question": "What does the Constitution do?",
      "answer": "sets up the government, defines the government, protects basic rights of Americans"
    },
    {
      "question": "The idea of self-government is in the first three words of the Constitution. What are these words?",
      "answer": "We the People"
    },
    {
      "question": "What is an amendment?",
      "answer": "a change (to the Constitution), an addition (to the Constitution)"
    },
    {
      "question": "What do we call the first ten amendments to the Constitution?",
      "answer": "the Bill of Rights"
    },
    {
      "question": "What is one right or freedom from the First Amendment?*",
      "answer": "speech, religion, assembly, press, petition the government"
  

Tokens Used: 1085
	Prompt Tokens: 721
	Completion Tokens: 364
Successful Requests: 1
Total Cost (USD): $0.00217
{
  "items": [
    {
      "question": "How old do citizens have to be to vote for President?*",
      "answer": "eighteen (18) and older"
    },
    {
      "question": "What are two ways that Americans can participate in their democracy?",
      "answer": "vote, join a political party, help with a campaign, join a civic group, join a community group, give an elected official your opinion on an issue, call Senators and Representatives, publicly support or oppose an issue or policy, run for office, write to a newspaper"
    },
    {
      "question": "When is the last day you can send in federal income tax forms?*",
      "answer": "April 15"
    },
    {
      "question": "When must all men register for the Selective Service?",
      "answer": "at age eighteen (18), between eighteen (18) and twenty-six (26)"
    },
    {
      "question": "What is one reason colonists came t

In [160]:
len(master.items)

99

In [169]:
import json
with open('data.json', 'w') as f:
    json.dump(master.dict(), f, indent=4)

## Actually Creating the Quiz

In [4]:
import json
with open('data.json', 'r') as f:
    json_data = json.load(f)
    
question_bank = QAs(**json_data)

In [5]:
import random
class Quiz:
    asked = set()
    question_bank: QAs
        
    def __init__(self, question_bank):
        self.question_bank = question_bank
        
    def quiz_and_give_answer(self):
        if len(self.asked) == 100:
            print("All questions asked. You're done!")
            return True
        random_index = random.randint(0, len(self.question_bank.items) - 1)
        while random_index in self.asked:
            random_index = random.randint(0, len(self.question_bank.items) - 1)
        self.asked.add(random_index)
        qa = self.question_bank.items[random_index]
        user_answer = input(qa.question + " Answer: ")
        check_answer_prompt = PromptTemplate(
            input_variables=["question", "answer", "user_answer"],
            template="You will be given a pair of question and answer, as well as a user's answer. Return if the user's answer is correct or not and explain why. Question: {question}\n Answer: {answer}\n User's answer: {user_answer}",
        )
        chat_input = check_answer_prompt.format_prompt(question=qa.question, answer=qa.answer, user_answer=user_answer)
        response = llm.predict(chat_input.to_string())
        print(response)
        return False
    
    def run_quiz(self):
        finished = False
        while not finished:
            finished = self.quiz_and_give_answer()
    


In [6]:
a = Quiz(question_bank=question_bank)
a.run_quiz()

In what month do we vote for President?* Answer: November


NameError: name 'PromptTemplate' is not defined