In [None]:
!pip install -qqq langchain langchain_community PyPDF2 pypdf ctransformers sentence_transformers faiss-gpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain.llms import CTransformers
from langchain.chains import QAGenerationChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import RetrievalQA
import os
import csv
from PyPDF2 import PdfReader

In [None]:
def load_llm():
    llm = CTransformers(
        model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
        model_type="mistral",
        max_new_tokens=1048,
        temperature=0.3
    )
    return llm

In [None]:
def file_processing(file_path):
    loader = PyPDFLoader(file_path)
    data = loader.load()

    question_gen = ''
    for page in data:
        question_gen += page.page_content

    splitter_ques_gen = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100
    )
    chunks_ques_gen = splitter_ques_gen.split_text(question_gen)
    document_ques_gen = [Document(page_content=t) for t in chunks_ques_gen]

    splitter_ans_gen = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=30
    )
    document_answer_gen = splitter_ans_gen.split_documents(document_ques_gen)

    return document_ques_gen, document_answer_gen

In [None]:
def llm_pipeline(file_path):
    document_ques_gen, document_answer_gen = file_processing(file_path)
    llm_ques_gen_pipeline = load_llm()

    prompt_template = """
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    {text}
    ------------

    Create questions that will prepare the coders or programmers for their tests.
    Make sure not to lose any important information.

    QUESTIONS:
    """

    PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=["text"])

    refine_template = ("""
    You are an expert at creating practice questions based on coding material and documentation.
    Your goal is to help a coder or programmer prepare for a coding test.
    We have received some practice questions to a certain extent: {existing_answer}.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    {text}
    ------------

    Given the new context, refine the original questions in English.
    If the context is not helpful, please provide the original questions.
    QUESTIONS:
    """
    )

    REFINE_PROMPT_QUESTIONS = PromptTemplate(
        input_variables=["existing_answer", "text"],
        template=refine_template,
    )

    ques_gen_chain = load_summarize_chain(llm=llm_ques_gen_pipeline,
                                          chain_type="refine",
                                          verbose=True,
                                          question_prompt=PROMPT_QUESTIONS,
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

    ques = ques_gen_chain.run(document_ques_gen)

    embeddings = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    vector_store = FAISS.from_documents(document_answer_gen, embeddings)

    llm_answer_gen = load_llm()

    ques_list = ques.split("\n")
    filtered_ques_list = [element for element in ques_list if element.endswith('?') or element.endswith('.')]

    answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen,
                                                          chain_type="stuff",
                                                          retriever=vector_store.as_retriever())

    return answer_generation_chain, filtered_ques_list

In [None]:
def generate_questions_and_answers(file_path):
    answer_generation_chain, ques_list = llm_pipeline(file_path)
    output_file = "QA.csv"

    with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(["Question", "Answer"])  # Writing the header row

        for question in ques_list:
            print("Question: ", question)
            answer = answer_generation_chain.run(question)
            print("Answer: ", answer)
            print("--------------------------------------------------\n\n")

            # Save answer to CSV file
            csv_writer.writerow([question, answer])

    return output_file

In [None]:
file_path = "./PDF_PATH.pdf"
generate_questions_and_answers(file_path)


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]



[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    grammarsaurus.co.ukReading Comprehension
Who is he?
Mohamed, often shortened to Mo, Salah is arguably one of the best football players 
in the world.  He was born on 15th June 1992 in Gharbia, Egypt. Currently, he plays 
his club football for Liverpool Football Club and internationally for his country of 
origin, Egypt.  Despite playing for Liverpool in a very competitive league full of 
rivalry, he is well liked by football fans all over England and across the world.  
Liverpool fans have dubbed him the Egyptian King. 
What does he do? 
Mo Salah plays football professionally. He plays in a forward posit




[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert at creating practice questions based on coding material and documentation.
    Your goal is to help a coder or programmer prepare for a coding test.
    We have received some practice questions to a certain extent: 1. Who is Mohamed Salah?
    2. What are the hobbies of Mohamed Salah?
    3. What does he play in football?
    4. Where was Mo Salah born?
    5. Which country does Mo Salah play for internationally?
    6. How many thousands of pounds has Mo Salah given away to charity?
    7. In which position does Mo Salah play in football?
    8. What is the name of Mo Salah's charity foundation?
    9. Which country is Mo Salah from?
    END.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    Charity Foundation has helped build schools and mosques.  Also, 
he h




[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert at creating practice questions based on coding material and documentation.
    Your goal is to help a coder or programmer prepare for a coding test.
    We have received some practice questions to a certain extent: 1..
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    a regular place in the first team. Subsequently, he was moved to Fiorentina on loan, 
then moved to Roma before signing for Liverpool in 2017. At Liverpool, he has 
found and sustained world-class form.  This form and success has propelled him to 
new heights and gained him several accolades, including the Golden Boot, which is 
an award for scoring the most goals in a competition.  Salah won this award for 
both the 17/18 and 18/19 Premier League seasons.
Why is he inspirational? 
Mo’s journey to




[1m> Finished chain.[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    You are an expert at creating practice questions based on coding material and documentation.
    Your goal is to help a coder or programmer prepare for a coding test.
    We have received some practice questions to a certain extent: 1. Which club did Mohamed Salah begin his footballing journey at?
    2. What was Mohamed Salahmed Salahmed Salahmed Salahmed Salad.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    In 2018, he became the first Egyptian footballer to be named the Professional 
Footballers' Association Player of the Year. He is only the second African footballer 
to win the award, after Leicester City's Algerian midfielder Riyad Mahrez in 2016. grammarsaurus.co.uk
Reading Comprehension
This became a great source of pride for Egyptians, who had the news splashed over 

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Question:  1. Which football club did Mohamed Salah begin his professional career with?
Answer:   Mohamed Salah began his professional career with El Mokawloon in Egypt.
--------------------------------------------------


Question:      2. What was Mohamed Salah's nickname in his childhood?
Answer:    1. In his childhood, Mohamed Salah was known as Mo.
--------------------------------------------------


Question:      3. How does Mohamed Salah observe Ramadan?
Answer:     Mohamed Salah observes Ramadan by fasting from dawn to sunset during the ninth month of the Muslim year.
--------------------------------------------------


Question:      4. What is the significance of Zakat in Islam, and how has Mohamed Salah promoted it through his charity work?
Answer:   The practice of Zakat is significant in Islam as it is an obligation for Muslims to donate a percentage of their income to benefit people who are less fortunate than themselves. Mohamed Salah has promoted Zakat through his char

'QA.csv'