In [1]:
! pip install --quiet -r requirements.txt

In [53]:
import os
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
WEAVIATE_CLUSTER = os.getenv("WEAVIATE_CLUSTER")
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY")

# Prepare Vector DB
import weaviate
from weaviate.classes.init import Auth

# Langchain Libraries
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
from langchain import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# torch and transformers framework
import torch
print("Is CUDA available: ", torch.cuda.is_available())
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

Is CUDA available:  True


In [3]:
# Create Weaviate Client to access Cluster
client = weaviate.Client(
    url = WEAVIATE_CLUSTER,
    auth_client_secret= weaviate.AuthApiKey(WEAVIATE_API_KEY),
    additional_headers={
        "X-HuggingFace-Api-Key": HF_TOKEN
    },
)

# Connect to Weaviate Cloud
# client = weaviate.connect_to_weaviate_cloud(
#     cluster_url=WEAVIATE_CLUSTER,                       # `weaviate_url`: your Weaviate URL
#     auth_credentials=Auth.api_key(WEAVIATE_API_KEY),    # `weaviate_key`: your Weaviate API key
# )
print("Is VectorDB ready: ",client.is_ready())

            your code to use Python client v4 `weaviate.WeaviateClient` connections and methods.

            For Python Client v4 usage, see: https://weaviate.io/developers/weaviate/client-libraries/python
            For code migration, see: https://weaviate.io/developers/weaviate/client-libraries/python/v3_v4_migration
            


Is VectorDB ready:  True


In [4]:
# Create the schema
schema = client.schema.get()
if len(schema["classes"]) ==0:
    schema = {
        "classes":[
            {
                "class": "RAG",
                "description": "Documents for RAG",
                "vectorizer": "text2vec-huggingface",
                "moduleConfig": {"text2vec-huggingface":{"model":"sentence-transformers/all-MiniLM-L6-v2"}},
                "properties":[
                    {
                        "name": "content",                    
                        "dataType":["text"],
                        "description": "The content of the paragraph",
                        "moduleConfig":{
                            "text2vec-huggingface":{
                                "skip": False,
                                "vectorizerPropertyName": False,
                            }
                        },
                    }
                ]
            }
        ]
    }
    client.schema.create(schema)

In [5]:
client.schema.get()

{'classes': [{'class': 'RAG',
   'description': 'Documents for RAG',
   'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
    'cleanupIntervalSeconds': 60,
    'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
   'moduleConfig': {'text2vec-huggingface': {'model': 'sentence-transformers/all-MiniLM-L6-v2',
     'vectorizeClassName': True}},
   'multiTenancyConfig': {'autoTenantActivation': False,
    'autoTenantCreation': False,
    'enabled': False},
   'properties': [{'dataType': ['text'],
     'description': 'The content of the paragraph',
     'indexFilterable': True,
     'indexSearchable': True,
     'moduleConfig': {'text2vec-huggingface': {'skip': False,
       'vectorizePropertyName': False,
       'vectorizerPropertyName': False}},
     'name': 'content',
     'tokenization': 'word'}],
   'replicationConfig': {'factor': 1},
   'shardingConfig': {'actualCount': 1,
    'actualVirtualCount': 128,
    'desiredCount': 1,
    'desiredVirtualCount': 128,
    '

In [6]:
retriever = WeaviateHybridSearchRetriever(
    alpha = 0.5,
    client = client,
    index_name = "RAG",
    text_key = "content",
    attributes = [],
    create_schema_if_missing = True
)

In [7]:
# Function to load 4-bit quantized model and create tokenizer
model_name = "HuggingFaceH4/zephyr-7b-beta"
def load_quantized_model(model_name: str):
    '''
    model_name: Name or Path of Model to be loaded.
    Return: Loaded Quantized Model
    '''
    bnb_config = BitsAndBytesConfig(
        load_in_4bit= True,
        bnb_4bit_use_double_quant= True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        low_cpu_mem_usage = True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype = torch.bfloat16,
        quantization_config = bnb_config,
        device_map = "cuda"
    )
    return model



In [8]:
# Initialize Tokenizer
def initialize_tokenizer(model_name: str):
    '''
    model_name: Name or Path of the model for tokenizer initialization.
    return: Initialized tokenizer.
    '''
    tokenizer = AutoTokenizer.from_pretrained(model_name, return_token_type_ids = False)
    tokenizer.bos_token_id = 1 # Set Begining of Sentence Token Id
    return tokenizer


In [9]:
# Load the Tokenizer and the model
model_name = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = initialize_tokenizer(model_name)
model = load_quantized_model(model_name)

Unused kwargs: ['low_cpu_mem_usage']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [10]:
# Create the Hugging Pipeline
pipeline = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    use_cache = True,
    device_map = "cuda",
    do_sample = True,
    top_k = 5,
    max_new_tokens = 100,
    num_return_sequences = 1,
    eos_token_id = tokenizer.eos_token_id,
    pad_token_id = tokenizer.pad_token_id
)

In [11]:
# Create LLM wrapper
llm = HuggingFacePipeline(pipeline = pipeline)

  warn_deprecated(


In [17]:
# Get Document
doc_path = os.path.join("document","LifeInsuranceAndAnnuities-2.pdf")
loader = PyPDFLoader(doc_path)
docs = loader.load()
docs[:3]

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


[Document(metadata={'source': 'document\\LifeInsuranceAndAnnuities-2.pdf', 'page': 0}, page_content='CALIFORNIA\nLIFE \nINSURANCE\nand\nANNUITIES\n800-927-HELP (4357)\nwww.insurance.ca.gov\nTDD  800.482.4833\nDEPARTMENT \nOF INSURANCE\nCALIFORNIA DEP ARTMENT OF INSURANCE\nConsumer Education and Outreach Bureau\n300 South Spring Street, South Tower\nLos Angeles, CA  90013\nMaterials presented in this borchure are a collective effort of the staff of the \nCalifornia Department of Insurance.\n'),
 Document(metadata={'source': 'document\\LifeInsuranceAndAnnuities-2.pdf', 'page': 1}, page_content='Life Insurance and Annuities  \x18Table of Contents\nLife Insurance and \nAnnuities\n  \nIntroduction to Life Insurance and Annuities  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 2\nDefining Your Needs   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 3\nLife Insurance   .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  

In [23]:
# Load the documents into Weaviate Vector DB
retriever.add_documents(docs)
# check a search
print(retriever.invoke("What is a period certain")[1].page_content)

24 State of California Department of InsuranceFace Amount  — The amount to be paid to the beneficiary 
when the insured dies  .  It will be reduced by any unpaid 
policy loans and interest on those loans, and may be 
increased by any dividends  .
Free Look  — The right of the policyowner to have a period 
of ten or more days to examine an insurance policy and, if 
not satisfied, return it to the company for a full refund of all amounts paid  .
Grace Period  — A period of time (usually 3 days) after the 
premium due date when an overdue premium may be paid without penalty  .  The policy remains in force throughout the 
period  .
Guaranteed Insurability  — An option that permits the 
policyholder to buy additional stated amounts of life 
insurance at certain times in the future, without having to provide new evidence of insurability  .
Illustration  — A document used in life insurance sales 
presentations showing year-by-year numbers indicating how a policy will work  .  Usually it assu

In [86]:
# Create prompt
system_prompt = (
    '''
Use the below Context to answer the question in a consice and easy way. If you do not know the answer say that not enough Context have been provided.
Always follow up the answer with a question to the user if he has any other questions to ask.
Context: {context}
Answer:
'''
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{query}")
    ]
)
template = '''
Use the below Context to answer the question in a consice and easy way. If you do not know the answer say that not enough Context have been provided.
Always follow up the answer with a question to the user if he has any other questions to ask. Do not show the comtext in the answer.
Context: {context}
Question: {query}
Helpful Answer: 
'''
prompt = PromptTemplate.from_template(template)


In [87]:
# Create the Retrieval Chain with stuff_documents
hybrid_chain = RetrievalQA.from_chain_type(llm = llm, chain_type="stuff", retriever = retriever)

In [88]:
# Test the chain

results = hybrid_chain.invoke("what is period certain annuity")
answer = results["result"][results["result"].find("Question:"):]

In [89]:
print(answer)

Question: what is period certain annuity
Helpful Answer: A Period Certain Annuity is a type of annuity that provides the annuitant with an income stream for a specific period of time, such as 5, 10, or 20 years, regardless of whether the annuitant lives or dies during that time. This type of annuity guarantees that the annuity payments will continue to be made to the annuitant or their beneficiary for the entire period, providing financial security and peace of mind. After the


In [90]:
# Setup the RAG Chain
rag_chain = (
    {"context": retriever, "query": RunnablePassthrough()} | prompt | llm
)

query = "What is Period Certain Annuity ?"
response = rag_chain.invoke(query)
print(response)


Use the below Context to answer the question in a consice and easy way. If you do not know the answer say that not enough Context have been provided.
Always follow up the answer with a question to the user if he has any other questions to ask. Do not show the comtext in the answer.
Context: [Document(page_content='Annuities \n \nWhile life insurance proceeds are paid at the time of \ndeath of the insured, the proceeds of an annuity can \nprovide you with an income for as long as you live  .  \nThere are two types of annuities:  \n \n• The first is when you pay a lump sum to a life  \ninsurance company, and they pay it out to you  \nright away in periodic installments  .  This type is  \nknown as an immediate annuity — the payments  \nto you start immediately  . \n \n• The second, and more common, is where money paid  \nby you accumulates with interest over a period of time  .   \nIf you choose, the accumulated amounts will then be paid \nout to you in periodic installments, usually wh