In [None]:
import os
from dotenv import load_dotenv

In [None]:
load_dotenv()
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")
# OPENAI_API_KEY

In [None]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [None]:
from langchain.document_loaders import PyPDFLoader

In [None]:
%pwd

In [None]:
%cd ..

In [None]:
%pwd

In [None]:
file_path = "data/SDG.pdf"
loader = PyPDFLoader(file_path)
data = loader.load()

In [None]:
data

In [None]:
len(data)

In [None]:
question_gen = ""
for page in data:
    question_gen += page.page_content

In [None]:
question_gen

In [None]:
from langchain.text_splitter import TokenTextSplitter

In [None]:
splitter_ques_gen = TokenTextSplitter(
    model_name= "gpt-3.5-turbo",
    chunk_size= 10000,
    chunk_overlap = 200
)

In [None]:
chunk_ques_gen = splitter_ques_gen.split_text(question_gen)

In [None]:
chunk_ques_gen

In [None]:
len(chunk_ques_gen)

In [None]:
type(chunk_ques_gen[0])

In [None]:
from langchain.docstore.document import Document

In [None]:
document_ques_gen = [Document(page_content = t) for t in chunk_ques_gen]

In [None]:
document_ques_gen

In [None]:
type(document_ques_gen[0])

In [None]:
splitter_ans_gen = TokenTextSplitter(
    model_name = 'gpt-3.5-turbo',
    chunk_size = 1000,
    chunk_overlap = 100
)

In [None]:
document_answer_gen = splitter_ans_gen.split_documents(
    document_ques_gen
)

In [None]:
document_answer_gen

In [None]:
len(document_answer_gen)

In [None]:
from langchain.chat_models import ChatOpenAI

In [None]:
llm_ques_gen_pipeline = ChatOpenAI(
    model = 'gpt-3.5-turbo',
    temperature = 0.3
)

In [None]:
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:

------------
{text}
------------

Create questions that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.

QUESTIONS:
"""

In [None]:
from langchain.prompts import PromptTemplate

In [None]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text'])

In [None]:
refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
------------
{text}
------------

Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)


In [None]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

In [None]:
from langchain.chains.summarize import load_summarize_chain

In [None]:
ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline, 
                                          chain_type = "refine", 
                                          verbose = True, 
                                          question_prompt=PROMPT_QUESTIONS, 
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

In [None]:
ques = ques_gen_chain.run(document_ques_gen)

print(ques)

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import FAISS

In [None]:
vector_store = FAISS.from_documents(document_answer_gen, embeddings)

In [None]:
llm_answer_gen = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")

In [None]:
ques

In [None]:
ques_list = ques.split("\n")

In [None]:
ques_list

In [None]:
from langchain.chains import RetrievalQA

In [None]:
answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen, 
                                               chain_type="stuff", 
                                               retriever=vector_store.as_retriever())


In [None]:
# Answer each question and save to a file
for question in ques_list:
    print("Question: ", question)
    answer = answer_generation_chain.run(question)
    print("Answer: ", answer)
    print("--------------------------------------------------\\n\\n")
    # Save answer to file
    with open("answers.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n")
        f.write("--------------------------------------------------\\n\\n")


