<a href="https://colab.research.google.com/github/sunnysavita10/Generative-AI-Indepth-Basic-to-Advance/blob/main/RAG_Fusion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RAG Fusion

In [None]:
!pip -q install langchain huggingftiktace_hub oken pypdf
!pip -q install google-generativeai chromadb
!pip -q install sentence_transformers

In [None]:
!pip install -U langchain-community

### Download the Data & Utils

In [None]:
import textwrap
def wrap_text(text, width=90): #preserve_newlines
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text


In [None]:
import os
from google.colab import userdata

GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

In [None]:
%pip install --upgrade --quiet  langchain-google-genai

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
result = llm.invoke("Write a ballad about LangChain")
print(result.content)

## Google

## Imports

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
import langchain

## Load in Docs

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data_path="/content/drive/MyDrive/English"

In [None]:
!pip install unstructured

In [None]:
%%time
loader = DirectoryLoader(data_path, glob="*.txt", show_progress=True)
docs = loader.load()

In [None]:
len(docs)

In [None]:
docs = docs[:50]
len(docs)

In [None]:
docs[0]

In [None]:
print(docs[2].page_content)

In [None]:
print(docs[1].page_content)

In [None]:
raw_text = ''
for i, doc in enumerate(docs):
    text = doc.page_content
    if text:
        raw_text += text

In [None]:
print(raw_text)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
    is_separator_regex = False,
)

In [None]:
texts = text_splitter.split_text(raw_text)

In [None]:
len(texts)

In [None]:
print(texts[4])

## BGE Embeddings

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

In [None]:
model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

In [None]:
embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    #model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs,
)

## Vector DB

In [None]:
%%time
### Make the chroma and persiste to disk
db = Chroma.from_texts(texts,embedding_function,persist_directory="./chroma_db")

In [None]:
query = "Tell me about Universal Studios Singapore?"

db.similarity_search(query, k=5)

## Setup a Retriever

In [None]:
retriever = db.as_retriever() # can add mmr fetch_k=20, search_type="mmr"

retriever.get_relevant_documents(query)

## Chat chain

In [None]:
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

In [None]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

In [None]:
prompt = ChatPromptTemplate.from_template(template)

In [None]:
prompt

In [None]:
llm

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
text_reply = chain.invoke("Tell me about Universal Studio Singapore")

print(wrap_text(text_reply))

## With RagFusion

In [None]:
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate

In [None]:
prompt = ChatPromptTemplate(input_variables=['original_query'],
                            messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[],template='You are a helpful assistant that generates multiple search queries based on a single input query.')),
                            HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['original_query'], template='Generate multiple search queries related to: {question} \n OUTPUT (4 queries):'))])

In [None]:
original_query = "universal studios Singapore"

In [None]:
generate_queries = (
    prompt | llm | StrOutputParser() | (lambda x: x.split("\n"))
)

In [None]:
generate_queries

In [None]:
from langchain.load import dumps, loads


def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [None]:
ragfusion_chain = generate_queries | retriever.map() | reciprocal_rank_fusion

In [None]:
langchain.debug = True

In [None]:
ragfusion_chain.input_schema.schema()

In [None]:
ragfusion_chain.invoke({"question": original_query})

In [None]:
from langchain.schema.runnable import RunnablePassthrough
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

full_rag_fusion_chain = (
    {
        "context": ragfusion_chain,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
full_rag_fusion_chain.input_schema.schema()

In [None]:
full_rag_fusion_chain.invoke({"question": "Tell me about Singapore’s nightlife scene?"})

In [None]:
Singapore’s nightlife scene is incredibly diverse, offering a blend of high-energy clubs and more relaxed options for a night out. You can dance to music from world-renowned DJs at a megaclub, savor a unique drink at a low-key cocktail bar, or enjoy live music before laughing the night away at a comedy club.


Singapore’s nightlife scene is incredibly diverse, offering a blend of high-energy clubs and more relaxed options for a night out. You can dance to music from world-renowned DJs at a megaclub, savor a unique drink at a low-key cocktail bar, or enjoy live music before laughing the night away at a comedy club.
