In [47]:
from dotenv import load_dotenv

from langchain import chains, document_loaders, vectorstores
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain.document_loaders import TextLoader

import re
import PyPDF2

In [52]:
with open("./materials/chekhov.txt", "r", encoding="utf-8") as file:
    text_content = file.read()
pdf = text_content
#len(pdf.pages)
#print(pdf)

In [56]:
class PdfQA:
    """
    Initializes the PdfQA class with the specified parameters.

    :param model: The name or path of the model to be loaded.
    :param pdf_document: The path to the PDF document to be loaded.
    :param chunk_size: The desired size of each chunk.
    :param chunk_overlap: The specified overlap between chunks.
    :param search_type: The type of search to be performed.
    :param n_documents: The number of documents to be retrieved.
    :param chain_type: The type of chain to create.
    """

    def __init__(self, model, pdf_document, chunk_size, chunk_overlap,
                 search_type, n_documents, chain_type):
        #load_dotenv()
        self.init_chat_model(model)
        #self.load_pdfs(pdf_document)
        self.load_text(pdf_document)
        self.split_documents(chunk_size, chunk_overlap)
        #self.select_embedding = OpenAIEmbeddings()
        self.select_embedding = OllamaEmbeddings(model='mxbai-embed-large:latest')
        self.create_vectorstore()
        self.create_retriever(search_type, n_documents)
        self.chain = self.create_chain(chain_type)

    def init_chat_model(self, model):
        """
        Initialize the chat model.

        :param model: The name or path of the model to be loaded.
        :return: None

        """
        print("Loading model")
        self.llm = ChatOpenAI(model_name=model, temperature=0)
        # self.llm = ChatOllama(model_name=model, temperature=0)
    # def load_pdfs(self, pdf_document):
    #     """
    #     Load documents from a PDF file and convert to a format that can be ingested by the langchain
    #     document splitter.

    #     :param pdf_document: The path to the PDF document to be loaded.
    #     :return: None
    #     """
    #     print("Loading PDFs")
    #     pdf_loader = document_loaders.PyPDFLoader(pdf_document)
    #     self.documents = pdf_loader.load()
    #     print(self.documents)
    
    def load_text(self, text):
        
        loader = TextLoader(text)
        self.documents = loader.load()
        

    def split_documents(self, chunk_size, chunk_overlap):
        """
        Split the documents into chunks of a given size with a specified overlap.

        :param chunk_size: The desired size of each chunk.
        :type chunk_size: int
        :param chunk_overlap: The specified overlap between chunks.
        :type chunk_overlap: int
        :return: None
        """
        print("Splitting documents")
        text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        self.texts = text_splitter.split_documents(self.documents)

    def create_vectorstore(self):
        """
        Create Vector Store.

        This method creates document embeddings using the Chroma algorithm from the given texts and selected embedding.

        :return: None
        """
        print("Creating document embeddings")
        self.db = vectorstores.Chroma.from_documents(self.texts, self.select_embedding)

    def create_retriever(self, search_type, n_documents):
        """
        Generate a chunk retriever for the given search type and number of documents.

        :param search_type: The type of search to be performed.
        :param n_documents: The number of documents to be retrieved.
        :return: None
        """
        print("Generating chunk retriever")
        self.retriever = self.db.as_retriever(search_type=search_type, search_kwargs={"k": n_documents})

    def create_chain(self, chain_type):
        """
        :param chain_type: The type of chain to create.
        :return: The created chain.
        """
        qa = chains.RetrievalQA.from_chain_type(llm=self.llm,
                                                chain_type=chain_type,
                                                retriever=self.retriever,
                                                return_source_documents=True)
        return qa

    def query_chain(self):
        """
        Returns the chain of the object.

        :return: The chain of the object.
        """
        return self.chain

## Levers in the RAG pipeline
RAG is quite tricky to get right, especially if you need it to be efficient. There are many levers we can pull in our pipeline, which influence the following things:
* How fast we can get our answers;
* How relevant our answers are (and related, how likely we are to get a hallucination);
* How complete our answers are.

Let's instantiate our PDF questioner with the following values:
* `model`: the LLM used to generate answers using information from the document. In this case, `gpt-3.5-turbo`.
* `pdf_document`: the PDF we want to "chat with". In our case, we've selected our PDF containing almost all of the PyCharm documentation.
* `chunk_size`: the maximum number of tokens to include in each chunk. We've selected 1000.
* `chunk_overlap`: the number of tokens that should overlap between adjacent chunks. We've selected 0, so no overlapping tokens.
* `search_type`: the metric by which chunks are selected. In this case, we've selected "similarity", so those chunks with the highest (cosine) similarity to the content of the question we're asking. However, you can also use "mmr" (if supported by your document store) which tries to maximise for relevancy and diversity of results.
* `n_documents`: the maximum number of chunks to use to generate the answer. In this case, we've used 5.
* `chain_type`: this controls how the content is passed into the LLM. In the case of "stuff" it passes all gathered context chunks into the context window at once. Other options are "refine", which feeds in the chunks in batches, plus the answer generated so far, and "map-rerank", which feeds in each chunk and assigns a score based on how well it answered the question.

Other levers I've chosen not to make arguments in this class are the model used for embeddings (the `OpenAIEmbeddings` were used) and which vector database we use to store the document embeddings (in this case, the `Chroma` vector store was used).

In [57]:
pdf_qa = PdfQA("gpt-3.5-turbo", "./materials/chekhov.txt", 1000, 0, "similarity", 
               5, "stuff")
pdf_qa_chain = pdf_qa.query_chain()

Created a chunk of size 1071, which is longer than the specified 1000
Created a chunk of size 1157, which is longer than the specified 1000
Created a chunk of size 2322, which is longer than the specified 1000
Created a chunk of size 1054, which is longer than the specified 1000
Created a chunk of size 1728, which is longer than the specified 1000
Created a chunk of size 1183, which is longer than the specified 1000
Created a chunk of size 1175, which is longer than the specified 1000
Created a chunk of size 1512, which is longer than the specified 1000
Created a chunk of size 1544, which is longer than the specified 1000
Created a chunk of size 1073, which is longer than the specified 1000
Created a chunk of size 1440, which is longer than the specified 1000
Created a chunk of size 1059, which is longer than the specified 1000
Created a chunk of size 1100, which is longer than the specified 1000
Created a chunk of size 1465, which is longer than the specified 1000
Created a chunk of s

Loading model
Splitting documents
Creating document embeddings
Generating chunk retriever


Let's try it out by asking how we can debug in PyCharm.

In [64]:
answer1 = pdf_qa_chain.invoke({"query": "what's a really good day according to Chekhov"})

In [65]:
answer1["result"]

"A really good day for Chekhov seems to involve celebrating St. Tatyana's Day with his literary friends, making speeches, and keeping the holiday. He also enjoyed spending time with his family and friends, having visitors over, singing, playing the piano, and laughing. Additionally, he found joy in working on his favorite activities like planting and pruning trees, looking after his garden, and helping invalids who appealed to him for assistance."

We can see the answer is very comprehensive. Let's have a look at the information it was based on from the documentation.

In [66]:
for document in answer1["source_documents"]:
    index_n = answer1["source_documents"].index(document)
    print(f"\nDOCUMENT {index_n + 1}")
    print(re.sub(r"\s+", " ", document.page_content.strip()))


DOCUMENT 1
"I have written sixty kopecks' worth," he would say with a smile. Braga's "Serenade" was the fashion at that time, and Chekhov was fond of hearing Potapenko play it on the violin while Miss Mizinov sang it. Having been a student at the Moscow University, Chekhov liked to celebrate St. Tatyana's Day. He never missed making a holiday of it when he lived in Moscow. That winter, for the first time, he chanced to be in Petersburg on the 12th of January. He did not forget "St. Tatyana," and assembled all his literary friends on that day in a Petersburg restaurant. They made speeches and kept the holiday, and this festivity initiated by him was so successful that the authors went on meeting regularly afterwards.

DOCUMENT 2
I have the honour to congratulate you and the hero of the name-day; [Footnote: It was the name-day of Chekhov himself.] I wish you and him health and prosperity, and above all that the mongoose should not break the crockery or tear the wall-paper. I shall celeb

We can see that the first three chunks are the most relevant, while the last three don't really add that much to the answer.

If we'd like, we can go a bit deeper with our answer. We can set up a memory for the last answer the LLM gave us so we can ask follow up questions. In this case, let's see if the LLM left out anything about PyCharm's debugging.

In [8]:
chat_history1 = [(answer1["query"], answer1["result"])]
answer2 = pdf_qa_chain.invoke({"query": "Have you left out any other types of debugging?",
                               "chat_history": chat_history1})

In [9]:
answer2["result"]

'Yes, there are other types of debugging mentioned in the context provided:\n\n1. Debugging JavaScript: This is mentioned as the next step intended for Professional edition users.\n2. Debugging Django templates: It is mentioned that you have learned how to step through your template, evaluate expressions, and add watches in the context of a Django project.\n3. Working in the Threads and Variables tab: It is mentioned that you can observe the variables used in the application by stepping through all the set breakpoints.\n4. Working in the Console tab: It is mentioned that you can use the Console tab to see error messages or perform calculations not related to the current application.\n\nThese are the additional types of debugging mentioned in the context provided.'

If our model is capable of it, we can even enter queries in a different language to the source documentation, and get relevant answers back in this language. Here we question our English-language documentation in German ...