<a href="https://colab.research.google.com/github/sunnysavita10/Indepth-GENAI/blob/main/Hybrid_Search_and_reranking_in_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://s4ds.org/

https://www.icdmai.org/


In [None]:
!pip install weaviate-client

In [None]:
!pip install langchain

In [None]:
!pip install -U langchain-community

In [None]:
import weaviate

In [None]:
WEAVIATE_CLUSTER="https://hybridsearch-ewd5zpr1.weaviate.network"
WEAVIATE_API_KEY="" # Replace with your Weaviate API key

In [None]:
WEAVIATE_URL = WEAVIATE_CLUSTER
WEAVIATE_API_KEY = WEAVIATE_API_KEY

In [None]:
HF_TOKEN=""  # Replace with your Hugging Face API token

In [None]:
import os

In [None]:
client = weaviate.Client(
    url=WEAVIATE_URL, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY),
    additional_headers={
         "X-HuggingFace-Api-Key": HF_TOKEN
    },
)

In [None]:
client.is_ready()

In [None]:
client.schema.get()

In [None]:
client.schema.delete_all()

In [None]:
schema = {
    "classes": [
        {
            "class": "RAG",
            "description": "Documents for RAG",
            "vectorizer": "text2vec-huggingface",
            "moduleConfig": {"text2vec-huggingface": {"model": "sentence-transformers/all-MiniLM-L6-v2", "type": "text"}},
            "properties": [
                {
                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-huggingface": {
                            "skip": False,
                            "vectorizePropertyName": False,
                        }
                    },
                    "name": "content",
                },
            ],
        },
    ]
}


In [None]:
client.schema.create(schema)

In [None]:
client.schema.get()

In [None]:
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever

In [None]:
retriever = WeaviateHybridSearchRetriever(
    alpha = 0.5,               # defaults to 0.5, which is equal weighting between keyword and semantic search
    client = client,           # keyword arguments to pass to the Weaviate client
    index_name = "RAG",  # The name of the index to use
    text_key = "content",         # The name of the text key to use
    attributes = [], # The attributes to return in the results
    create_schema_if_missing=True,
)

In [None]:
model_name = "HuggingFaceH4/zephyr-7b-beta"

In [None]:
!pip install bitsandbytes

In [None]:
!pip install accelerate

In [None]:
import torch
from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline, )
from langchain import HuggingFacePipeline

In [None]:
# function for loading 4-bit quantized model
def load_quantized_model(model_name: str):
    """
    model_name: Name or path of the model to be loaded.
    return: Loaded quantized model.
    """
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        quantization_config=bnb_config,
    )
    return model

In [None]:
# initializing tokenizer
def initialize_tokenizer(model_name: str):
    """
    model_name: Name or path of the model for tokenizer initialization.
    return: Initialized tokenizer.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids=False)
    tokenizer.bos_token_id = 1  # Set beginning of sentence token id
    return tokenizer

In [None]:
tokenizer = initialize_tokenizer(model_name)

In [None]:
model = load_quantized_model(model_name)

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    #max_length=2048,
    do_sample=True,
    top_k=5,
    max_new_tokens=100,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipeline)

In [None]:
doc_path="/content/Retrieval-Augmented-Generation-for-NLP.pdf"

In [None]:
!pip install pypdf

In [None]:
!pip install langchain_community

In [None]:
from langchain_community.document_loaders import PyPDFLoader

In [None]:
loader = PyPDFLoader(doc_path)

In [None]:
docs = loader.load()

In [None]:
docs

In [None]:
docs[6]

In [None]:
retriever.add_documents(docs)

In [None]:
print(retriever.invoke("what is RAG token?")[0].page_content)

In [None]:
retriever.invoke(
    "what is RAG token?",
    score=True
)

In [None]:
from langchain.chains import RetrievalQA

In [None]:
from langchain_core.prompts import ChatPromptTemplate

In [None]:
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{query}"),
    ]
)

In [None]:
from langchain.prompts import PromptTemplate
template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you do not have the relevant information needed to provide a verified answer, don't try to make up an answer.
When providing an answer, aim for clarity and precision. Position yourself as a knowledgeable authority on the topic, but also be mindful to explain the information in a manner that is accessible and comprehensible to those without a technical background.
Always say "Do you have any more questions pertaining to this instrument?" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""

prompt = PromptTemplate.from_template(template)

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
hybrid_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,)


In [None]:
result1 = hybrid_chain.invoke("what is natural language processing?")
print(result1)

In [None]:
print(result1['result'])

In [None]:
query="What is Abstractive Question Answering?"

In [None]:
response = hybrid_chain.invoke({"query":query})

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [None]:
# Set up the RAG chain
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()} |
    prompt |
    llm
)

In [None]:
query="what is RAG token?"

In [None]:
response=rag_chain.invoke("what is RAG token?")

In [None]:
print(response)

In [None]:
response

In [None]:
print(response["result"])

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

In [None]:
!pip install cohere

In [None]:
compressor = CohereRerank(cohere_api_key="")  # Replace with your Cohere API key

In [None]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
    )

In [None]:
compressed_docs = compression_retriever.get_relevant_documents(user_query)
# Print the relevant documents from using the embeddings and reranker
print(compressed_docs)


In [None]:
hybrid_chain = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=compression_retriever
)

In [None]:
response = hybrid_chain.invoke("What is Abstractive Question Answering?")

In [None]:
print(response.get("result"))

In [None]:
print(response.get("result"))