In [7]:
import os
from dotenv import load_dotenv

In [8]:
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

In [9]:
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [10]:
from langchain.document_loaders import PyPDFLoader

In [11]:
%cd ..

/home/abdul-moeez/Documents/Projects/interview-question-application


In [12]:
file_path = "data/285017-data-information-and-knowledge.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [13]:
question_gen=""

for page in data:
    question_gen+=page.page_content

In [14]:
question_gen

"Topic support guide \n \n \n \n \n \n \n \n \n \n \nCambridge International AS & A Level \nInformation Technology \n9626 \nFor examination from 2017 \n \nTopic 1.1 Data, information and \nknowledgeCambridge International Examinations retains the copyright on all its publications.  \nRegistered Centres are permitted to copy material from this booklet for their own  \ninternal use. However, we cannot give permission to Centres to photocopy any \nmaterial that is acknowledged to a third party even for internal use within a Centre. \n \n© Cambridge International Examinations 2015 \nVersion 1Contents \n \n \nIntroduction ................................................................................................................................... 2 \nHow to use this guide .................................................................................................................. 2 \nLearning objectives ..............................................................................

In [15]:
from langchain.text_splitter import TokenTextSplitter

In [16]:
splitter_ques_gen = TokenTextSplitter(
    chunk_size=10000,
    chunk_overlap=200
)

In [17]:
chunk_ques_gen=splitter_ques_gen.split_text(question_gen)

In [18]:
chunk_ques_gen

["Topic support guide \n \n \n \n \n \n \n \n \n \n \nCambridge International AS & A Level \nInformation Technology \n9626 \nFor examination from 2017 \n \nTopic 1.1 Data, information and \nknowledgeCambridge International Examinations retains the copyright on all its publications.  \nRegistered Centres are permitted to copy material from this booklet for their own  \ninternal use. However, we cannot give permission to Centres to photocopy any \nmaterial that is acknowledged to a third party even for internal use within a Centre. \n \n© Cambridge International Examinations 2015 \nVersion 1Contents \n \n \nIntroduction ................................................................................................................................... 2 \nHow to use this guide .................................................................................................................. 2 \nLearning objectives .............................................................................

In [19]:
len(chunk_ques_gen)

1

In [20]:
from langchain.docstore.document import Document

In [21]:
document_ques_gen=[Document(page_content=t) for t in chunk_ques_gen]

In [22]:
splitter_ques_gen = TokenTextSplitter(
    chunk_size=10000,
    chunk_overlap=200
)

In [23]:
document_answer_gen= splitter_ques_gen.split_documents(
    document_ques_gen
)

In [24]:
document_answer_gen

[Document(metadata={}, page_content="Topic support guide \n \n \n \n \n \n \n \n \n \n \nCambridge International AS & A Level \nInformation Technology \n9626 \nFor examination from 2017 \n \nTopic 1.1 Data, information and \nknowledgeCambridge International Examinations retains the copyright on all its publications.  \nRegistered Centres are permitted to copy material from this booklet for their own  \ninternal use. However, we cannot give permission to Centres to photocopy any \nmaterial that is acknowledged to a third party even for internal use within a Centre. \n \n© Cambridge International Examinations 2015 \nVersion 1Contents \n \n \nIntroduction ................................................................................................................................... 2 \nHow to use this guide .................................................................................................................. 2 \nLearning objectives ..........................................

In [None]:
from langchain_groq import ChatGroq

chat = ChatGroq(temperature=0.3, 
                model_name="llama-3.3-70b-versatile")


In [26]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare  a coder or a programmer for there exam and coding tests.
Your do this by asking questions  about the text below:

--------------
{text}
--------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information

Questions:
"""

In [27]:
from langchain.prompts import PromptTemplate

In [28]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text'])

In [29]:
refine_template = (
    """
You are an expert at creating practice questions based on coding material and documentation
Your goal is to help a coder or programmer prepare for coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the exisiting questions or add new ones.
(Only if necessary) with some more context below.

----------------
{text}
----------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the orignial questions.
QUESTIONS:
"""
)

In [30]:
REFINE_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["existing_answer", "TokenTextSplitter"],
    template=refine_template
)

In [31]:
from langchain.chains.summarize import load_summarize_chain

ques_gen_chain = load_summarize_chain(
    llm=chat,
    chain_type="refine",
    verbose=True,
    question_prompt=PROMPT_QUESTIONS,
    refine_prompt=REFINE_PROMPT_TEMPLATE
)

In [32]:
ques = ques_gen_chain.run(document_ques_gen)
ques

  ques = ques_gen_chain.run(document_ques_gen)




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare  a coder or a programmer for there exam and coding tests.
Your do this by asking questions  about the text below:

--------------
Topic support guide 
 
 
 
 
 
 
 
 
 
 
Cambridge International AS & A Level 
Information Technology 
9626 
For examination from 2017 
 
Topic 1.1 Data, information and 
knowledgeCambridge International Examinations retains the copyright on all its publications.  
Registered Centres are permitted to copy material from this booklet for their own  
internal use. However, we cannot give permission to Centres to photocopy any 
material that is acknowledged to a third party even for internal use within a Centre. 
 
© Cambridge International Examinations 2015 
Version 1Contents 
 
 
Introduction ..........................

'Here are some questions based on the provided text to help prepare coders or programmers for their tests:\n\n**Multiple Choice Questions**\n\n1. What is the primary difference between data and information?\na) Data is processed, while information is raw\nb) Data has meaning, while information does not\nc) Data has no meaning, while information has meaning\nd) Data is used for storage, while information is used for processing\n\nAnswer: c) Data has no meaning, while information has meaning\n\n2. What is the equation for converting data into information?\na) Data + Context = Information\nb) Data + Meaning = Information\nc) Data - Meaning = Information\nd) Information - Data = Knowledge\n\nAnswer: b) Data + Meaning = Information\n\n3. What is the term for knowledge that can be easily passed on to others?\na) Tacit knowledge\nb) Explicit knowledge\nc) Implicit knowledge\nd) Raw data\n\nAnswer: b) Explicit knowledge\n\n4. What is the term for knowledge that is difficult to pass on to other

In [33]:
from langchain.embeddings import HuggingFaceEmbeddings

In [34]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [35]:
from langchain.vectorstores import FAISS

In [36]:
vector_store = FAISS.from_documents(document_answer_gen, embeddings)

In [37]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x70a4993a1960>

In [38]:
chat = ChatGroq(temperature=0.3, 
                model_name="llama-3.3-70b-versatile")


In [39]:
quest_list = ques.split('\n')
quest_list

['Here are some questions based on the provided text to help prepare coders or programmers for their tests:',
 '',
 '**Multiple Choice Questions**',
 '',
 '1. What is the primary difference between data and information?',
 'a) Data is processed, while information is raw',
 'b) Data has meaning, while information does not',
 'c) Data has no meaning, while information has meaning',
 'd) Data is used for storage, while information is used for processing',
 '',
 'Answer: c) Data has no meaning, while information has meaning',
 '',
 '2. What is the equation for converting data into information?',
 'a) Data + Context = Information',
 'b) Data + Meaning = Information',
 'c) Data - Meaning = Information',
 'd) Information - Data = Knowledge',
 '',
 'Answer: b) Data + Meaning = Information',
 '',
 '3. What is the term for knowledge that can be easily passed on to others?',
 'a) Tacit knowledge',
 'b) Explicit knowledge',
 'c) Implicit knowledge',
 'd) Raw data',
 '',
 'Answer: b) Explicit knowl

In [40]:
from langchain.chains import RetrievalQA

In [41]:
answer_generation_chain = RetrievalQA.from_chain_type(
    llm = chat,
    chain_type="stuff",
    retriever=vector_store.as_retriever()
)

In [42]:
for question in quest_list:
    print("Question:", question)
    answer= answer_generation_chain.run(question)
    print("Answer", answer)
    print("------------------------------------\\n\\n")
    with open("answer.txt", "a") as f:
        f.write("Question:" + question + "\\n")
        f.write("Answer:" + answer + "\\n")
        f.write("-------------------------------\\n\\\n")
      

Question: Here are some questions based on the provided text to help prepare coders or programmers for their tests:
Answer I'm ready to help. What are the questions? Go ahead and ask away, and I'll do my best to provide answers based on the provided text about Cambridge International AS & A Level Information Technology, specifically topic 1.1 Data, information, and knowledge.
------------------------------------\n\n
Question: 
Answer It looks like you didn't ask a question. Please go ahead and ask your question about the topic "Data, information and knowledge" from the Cambridge International AS & A Level Information Technology syllabus, and I'll do my best to help.
------------------------------------\n\n
Question: **Multiple Choice Questions**


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jsrpqcbafzvt92krfss99aad` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 98571, Requested 4718. Please try again in 47m20.845s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}