# Chat over documents with open source LLM

This colab notebook allows you to have a conversation with an AI assistant helping you answering questions about the PDF you uploaded. The assistant is powered by Open source LLM `Mistral-7b-instruct`, you can swtich to another model you want.

## How it works

- Uplaod PDF via the gradio interface.
- The PDF will be converted into text and then splitted into chunks to store in a embedding database.
- End user's query will be used to retrieve the related chunks from the PDF, which will be used as context for answering the end user's query.

In [None]:
!pip install gradio==3.48 --quiet
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --quiet
!pip install langchain llama_index --quiet
!pip install huggingface-hub sentence_transformers --quiet
!pip install faiss-cpu pdfminer.six --quiet

### download the model

In [None]:
!huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.1-GGUF mistral-7b-instruct-v0.1.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False

### load the model using the langchain llmacpp wrapper

In [3]:
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [None]:
n_gpu_layers = 50
n_batch = 512
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    temperature=0,
    # n_batch=n_batch,
    # callback_manager=callback_manager,
    n_ctx=4096,
    streaming = True,
    verbose=True,
)

### open source model for embedding

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity
model = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    query_instruction="Represent this sentence for searching relevant passages:"
)

### Use gradio as interface, the retrieval QA functionality is implemented via langchain library

In [6]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PDFMinerLoader
from langchain.chains import RetrievalQA

In [None]:
import gradio as gr
import os
import time


config = {
    "llm": None
}


def add_text(history, text):
    history = history + [(text, None)]
    return history, gr.Textbox(value="", interactive=False)


def add_file(history, file):
    short_name = file.name.split("/")[-1]
    history = history + [(short_name, None)]
    return history


def bot(history, config=config):
    response = "Please upload pdf to start"
    if config["llm"] is not None:
        response = config["llm"].run(history[-1][0])
    history[-1][1] = response
    return history
    # for character in response:
    #     history[-1][1] += character
    #     time.sleep(0.05)
    #     yield history

def file_bot(history, file, config=config):
    loader = PDFMinerLoader(file.name)
    data = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    documents = text_splitter.split_documents(data)
    db = FAISS.from_documents(documents, model)
    retriever = db.as_retriever(search_kwargs={"k": 2})
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",retriever=retriever)
    template = """
    <s>[INST]Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

    {context}

    Question: {question}
    Helpful Answer:[/INST]
    """
    PROMPT = PromptTemplate(
        template=template, input_variables=["context", "question"]
    )
    qa.combine_documents_chain.llm_chain.prompt = PROMPT
    config["llm"] = qa
    response = "PDF uploaded"
    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        time.sleep(0.05)
        yield history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
    )

    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="Enter text and press enter, or upload an image",
            container=False,
        )
        btn = gr.UploadButton("📁", file_types=[".pdf"])

    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
        bot, chatbot, chatbot
    )
    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
    file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(
        file_bot, [chatbot, btn], chatbot
    )

demo.queue()
demo.launch(debug=True)
