In [None]:
! pip install weaviate-client langchain langchain_community bitsandbytes accelerate pypdf cohere transformers torch 

In [None]:
import weaviate
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
import torch
from transformers import (AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline)
from langchain import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank


In [None]:
WEAVIATE_CLUSTER = "WEAVIATE_CLUSTER"
WEAVIATE_API_KEY = "WEAVIATE_API_KEY"
HF_TOKEN = "HUGGINGFACE_TOKEN"

In [None]:
client = weaviate.Client(
    url=WEAVIATE_CLUSTER, auth_client_secret=weaviate.AuthApiKey(WEAVIATE_API_KEY),
    additional_headers={
        "X-HuggingFace-Api-Key":HF_TOKEN
    } 
)

In [None]:
client.is_ready()

In [None]:
schema = {
    "classes": [
        {
            "class": "RAG",
            "description": "Documents for RAG",
            "vectorizer": "text2vec-huggingface",
            "moduleConfig": {"text2vec-huggingface": {"model": "sentence-transformers/all-MiniLM-L6-v2", "type":"text", "vectorizeClassName": True}},
            "properties": [
                {

                    "dataType": ["text"],
                    "description": "The content of the paragraph",
                    "moduleConfig": {
                        "text2vec-huggingface": {
                            "skip": False,
                            "vectorizePropertyName": False,

                        },

                    },

                    "name": "content",
                },
            ],
        },
    ]

}

In [None]:
client.schema.create(schema)

In [None]:
client.schema.get()

In [None]:
retriever = WeaviateHybridSearchRetriever(
    client=client , # Keyword arguments to pass to the Weaviate client
    index_name="RAG", # The name of the index to use
    text_key="content",
    alpha=0.5,
    attributes=[],
    create_schema_if_missing=True,
    
)

In [None]:
model_name = "HuggingFaceH4/zephyr-7b-beta"

In [None]:
# Function for loading 4-bit quantized model
def load_quantized_model(model_name:str):

    """
    model_name: Name or path of the model to be loaded.
    return: Loaded quantized model.
    
    """

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        low_cpu_mem_usage=True
    )

    model = AutoModelForCausalLM.from_pretraind(
        model_name,
        torch_dtype = torch.bfloat16,
        quantization_config = bnb_confing
    )

    return model

In [None]:
# Initialize_tokenizer
def initialize_tokenizer(model_name:str):
    """
    model_name: Name or path of the model for tokenizer initialization.
    return: Initialized tokenizer.

    """

    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_id=False)
    tokenizer.bos_token_id = 1 # Set beginning of sentence token id
    return tokenizer

In [None]:
tokenizer = initialize_tokenizer(model_name=model_name)

In [None]:
model = load_quantized_model(model_name=model_name)

In [None]:
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=3000,
    do_sample=True,
    top_k = 5,
    #max_new_tokens=100,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

In [None]:
llm = HuggingFacePipeline(pipeline=pipeline)

In [None]:
doc_path = "/content/Retrieval-Augmented-Generation-for-NLP.pdf"

In [None]:
loader = PyPDFLoader(doc_path)

In [None]:
docs = loader.load()

In [None]:
retriever.add_documents(docs)

In [None]:
retriever.invoke("What is RAG Token?",
                 score=True)

In [None]:
hybrid_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [None]:
result = hybrid_chain.invoke("What is RAG Token?")

In [4]:
COHERE_API_KEY = "COHERE_API_KEY"

In [None]:
compressor = CohereRerank(cohere_api_key=COHERE_API_KEY)

In [None]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [None]:
compressod_docs = compression_retriever.get_relevant_documents("What is RAG Token?")

In [None]:
print(compressod_docs[0])

In [None]:
hybrid_chain_1 = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=compression_retriever
)

In [None]:
response = hybrid_chain_1.invoke("What is Abstractive Question Answering?")