In [1]:
!nvidia-smi

Sat Aug 10 17:53:32 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.70                 Driver Version: 560.70         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   50C    P8              6W /   75W |     522MiB /   6144MiB |     20%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!pip install -r requirements.txt



In [3]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM, HuggingFaceInferenceAPI
from llama_index.prompts.prompts import SimpleInputPrompt
import os

In [4]:
documents = SimpleDirectoryReader("D:\PracticeProjects\RAG_LLAMA_INDEX\Data").load_data()

In [5]:
documents

[Document(id_='c365c015-2b34-4905-895d-6562345be462', embedding=None, metadata={'page_label': '1', 'file_name': 'life_insurance_guide.pdf', 'file_path': 'D:\\PracticeProjects\\RAG_LLAMA_INDEX\\Data\\life_insurance_guide.pdf', 'file_type': 'application/pdf', 'file_size': 1031179, 'creation_date': '2024-08-08', 'last_modified_date': '2024-08-08', 'last_accessed_date': '2024-08-09'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='A consumer’s guide to:\nLife\nInsurance \nWhat you should know about shopping  \nfor life insurance\n', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='cd86c6a2-d98e-424a-b50f-efe7e5aec720', embedding=None, metadata={'

In [6]:
system_prompt = '''
You are a Question and Answer assistant. Your goal is to answer questions as \
accurately as possible based on the instructions and context provided.
'''
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")


In [15]:
from dotenv import load_dotenv
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [23]:
#Loading the Mistral Model
import torch

llm = HuggingFaceInferenceAPI(
    token=HF_TOKEN,
    context_window = 4096,
    max_new_tokens = 256,
    generate_kwargs = {"temperature" : 0, "do_sample": False},
    system_prompt = system_prompt,
    query_wrapper_prompt = query_wrapper_prompt,
    tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2",
    model_name = "mistralai/Mistral-7B-Instruct-v0.2",
    device_map = "auto",
    model_kwargs = {"torch_dtype": torch.float16, "load_in_8bit":True}
    )

# If running locally with downloaded model. Replace Token with HuggingFacehub CLI Login
# llm = HuggingFaceLLM(
#     context_window = 4096,
#     max_new_tokens = 256,
#     generate_kwargs = {"temperature" : 0, "do_sample": False},
#     system_prompt = system_prompt,
#     query_wrapper_prompt = query_wrapper_prompt,
#     tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.2",
#     model_name = "mistralai/Mistral-7B-Instruct-v0.2",
#     device_map = "auto",
#     model_kwargs = {"torch_dtype": torch.float16, "load_in_8bit":True}
#     )

In [24]:
# Get the embedding model
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index import ServiceContext
from llama_index.embeddings import LangchainEmbedding


In [25]:
embed_model = LangchainEmbedding(
    HuggingFaceBgeEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")
)

In [26]:
service_context = ServiceContext.from_defaults(
    chunk_size = 1024,
    llm = llm,
    embed_model= embed_model
)

In [27]:
index = VectorStoreIndex.from_documents(documents, service_context= service_context)

In [28]:
query_engine = index.as_query_engine()

In [29]:
query = "What is Term Insurance ?"
response = query_engine.query(query)
print(response)

 Term Insurance is a type of life insurance that provides coverage for a specific term or period. It does not have a cash value and the policy expires once the term ends. The premiums for term insurance increase as the policyholder ages due to the increased risk of death. It is well suited to fill temporary insurance needs, such as bridging a gap in coverage between jobs.
