In [None]:
! pip install -qU langchain chromadb huggingface_hub sentence_transformers pypdf openai tiktoken langchain-community

In [None]:
! pip install llama-cpp-python

Loading Data

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings, OpenAIEmbeddings, HuggingFaceBgeEmbeddings
from google.colab import userdata
from langchain.vectorstores import Chroma
import chromadb
import os
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.document_transformers import (
    EmbeddingsClusteringFilter,
    EmbeddingsRedundantFilter,
)
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain.document_transformers import LongContextReorder
from re import search
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA


In [None]:
harrypotter_loader = PyPDFLoader("/content/harry_potter_book.pdf")

In [None]:
harrypotter_documents = harrypotter_loader.load()

In [None]:
print(len(harrypotter_documents))

In [None]:
got_loader = PyPDFLoader("/content/got_book.pdf")

In [None]:
got_documents = got_loader.load()

In [None]:
print(len(got_documents))

Creating Text Splitter for Chunking

In [None]:
text_splitter =RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

In [None]:
harrypotter_text = text_splitter.split_documents(harrypotter_documents)

In [None]:
got_text = text_splitter.split_documents(got_documents)

In [None]:
print(len(harrypotter_text)), print(len(got_text))

Load Embedding Model to Conver the Data into Vector

In [None]:
hf_embeddings = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
hf_bge_embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-Large-en")

In [None]:
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")

In [None]:
openai_embeddings = OpenAIEmbeddings(openai_api_type=OPENAI_API_KEY)

Ingest Data into Chroma Database

In [None]:
os.getcwd()

In [None]:
CURRENT_DIR = os.path.dirname(os.path.abspath("."))

In [None]:
DB_DIR = os.path.join(CURRENT_DIR, "/content/db")

In [None]:
client_settings = chromadb.config.Settings(
    is_persistent = True,
    persist_directory=DB_DIR,
    anonymized_telemetry=False,
)

In [None]:
harrypotter_vectorstore = Chroma.from_documents(
    harrypotter_text,
    hf_bge_embeddings,
    client_settings=client_settings,
    collection_name="Harrypotter",
    collection_metadata={"hnsw":"cosine"},
    persist_directory="/store/harrypotter"
)

In [None]:
got_vectorstore = Chroma.from_documents(
    got_text,
    hf_bge_embeddings,
    client_settings=client_settings,
    collection_name="got",
    collection_metadata={"hnsw":"cosine"},
    persist_directory="/store/got"
)

Now Create Retriever

In [None]:
harrypotter_retriever = harrypotter_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k":5, "include_metadata":True})

In [None]:
got_retriever = got_vectorstore.as_retriever(search_type="mmr", search_kwargs={"k":5, "include_metadata":True})

Merge both Retriever 
(It is also called lord of retriever(LOTR))

In [None]:
lotr = MergerRetriever(retrivers=[harrypotter_retriever, got_retriever])

In [None]:
for chunks in lotr.get_relevant_documents("Who was jon snow?"):
    print(chunks.page_content)

In [None]:
for chunks in lotr.get_relevant_documents("Who is Harry potter?"):
    print(chunks.page_content)

Now after understanding step by step, time to create a pipeline for LLM

In [None]:
_filter = EmbeddingsRedundantFilter(embeddings=hf_bge_embeddings)
reordering = LongContextReorder()
pipeline = DocumentCompressorPipeline(transformers=[_filter, reordering])
compression_retriever_reordered = ContextualCompressionRetriever(
    base_compressor=pipeline, base_retriever=lotr, search_kwargs={"k":3, "include_metadata":True}
)

In [None]:
docs = compression_retriever_reordered.get_relevant_documents("What is esops?")
print(len(docs))

print(docs[0].page_content)

Loading Model from HuggingFace

In [None]:
llms = LlamaCpp(
    streaming=True,
    model_path="/content/drive/MyDrive/zephyr-7b-beta.Q4_K_M.gguf",
    max_tokens=1500,
    temperature=0.75,
    top_p=1,
    gpu_layers=0,
    stream=True,
    verbose=True,
    n_threads=int(os.cpu_count()/2),
    n_ctx=4096
)

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llms,
    chain_type="stuff",
    retriver=compression_retriever_reordered,
    return_source_documents = True
)

In [None]:
query = "Who is jon Snow?"
results = qa(query)
print(results["result"])
print(results["source_documents"])

In [None]:
results_1 = qa("Who is Harry Potter?")
print(results_1["result"])

print(results_1["source_documents"])

for source in results_1["source_documents"]:
    print(source.metadata)

In [None]:
results_2 = qa.invoke("How does Jon Snow's relationship with Stark family?")

In [None]:
results_2

In [None]:
print(results_2["result"])

In [None]:
print(results_2["source_documents"])

In [None]:
for source in results_2["source_documents"]:
    print(source.metadata)