In [1]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import LlamaCpp

import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class ChatPDF:

    def __init__(self, files: list, vectordb_path: str):
        self.files = files
        self.pages = []
        self.documents = []
        self.vectordb_path = vectordb_path

    def load(self):

        pages = []

        for file in self.files:
            loader = PyPDFLoader(file)
            pages = loader.load()
            self.pages.extend(pages)
            print(f"Loading file {file}")

        return len(self.files), len(self.pages)
    
    def split(self, chunk_size: int = 1500, chunk_overlap: int = 150):

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
        )

        self.documents = text_splitter.split_documents(self.pages)

        return len(self.documents)
    
    def get_embeddings(self):
        self.embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    def store(self):
        vectordb = Chroma.from_documents(
            documents=self.documents,
            embedding=self.embeddings,
            persist_directory=self.vectordb_path
        )

        vectordb.persist()

        self.vectordb = vectordb

    def create_llm(self, temperature: float = 0.4):
        
            self.llm = LlamaCpp(model_path="../models/llama-2-7b-chat.ggmlv3.q4_0.bin", verbose=True, n_ctx=2048, temperature=temperature)

    def create_memory(self):
        self.memory = ConversationBufferMemory(
            memory_key="chat_history",
            return_messages=True
        )

    def create_retriever(self):
        self.retriever = self.vectordb.as_retriever()

    def create_chat_session(self):

        PROMPT_TEMPLATE = """
        Use the following pieces of context to answer the question at the end. 
        If you don't know the answer, just say that you don't know, don't try to male up an answer. 
        Use three sentences maximum. Keep answer as concise as possible. 
        Always say "thanks for asking! at the end of the answer.
        {context}
        Question: {question}
        Helpful Answer:"""

        QA_CHAIN_PROMPT = PromptTemplate.from_template(PROMPT_TEMPLATE)

        self.qa = ConversationalRetrievalChain.from_llm(
            self.llm,
            retriever=self.retriever,
            memory=self.memory,
            combine_docs_chain_kwargs={'prompt': QA_CHAIN_PROMPT}
        )

In [None]:
files = ['../docs/Anexo_2_Descricao_Desafios_caracteristicas_e_especificidades_dos_desafios']
vectordb_path = "../docs/chroma"

chat = ChatPDF(files, vectordb_path)

In [None]:
chat.load()
chat.split()
chat.get_embeddings()
chat.store()
chat.create_llm()
chat.create_memory()
chat.create_retriever()
chat.create_chat_session()

In [None]:
# Front end web app
chat_history = []

with gr.Blocks() as demo:

    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    chat_history = []
    
    def user(user_message, chat_history):
        
        # Get result from QA chain
        result = chat.qa({"question": user_message, "chat_history": chat_history})
        
        # Append user message and response to chat history
        chat_history.append((user_message, result["answer"]))

        return gr.update(value=""), chat_history
    
    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

if __name__ == "__main__":
    demo.launch(debug=True)