## Llama2 powered RAG

In [1]:
!pip install pypdf



In [2]:
!pip install -q transformers einops accelerate langchain bitsandbytes

In [3]:
## Embedding
!pip install sentence-transformers



In [None]:
!pip install llama_index



In [4]:
import llama_index
print(dir(llama_index))

['__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__']


In [5]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
from llama_index.core.prompts.prompts import SimpleInputPrompt








In [6]:
documents=SimpleDirectoryReader("/content/data").load_data()
documents

[Document(id_='267ac72e-e14e-4233-a8e8-cb8d998d4133', embedding=None, metadata={'page_label': '1', 'file_name': 'Attention is all you Need.pdf', 'file_path': '/content/data/Attention is all you Need.pdf', 'file_type': 'application/pdf', 'file_size': 2215244, 'creation_date': '2024-10-21', 'last_modified_date': '2024-10-21'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@googl

In [7]:
system_prompt="""
You are a Q&A assistant. Your goal is to answer questions as
accurately as possible based on the instructions and context provided.
"""
## Default format supportable by LLama2
query_wrapper_prompt=SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [9]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineGr

In [8]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(llm_int8_threshold=200.0)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    torch_dtype=torch.float16,
    device_map="auto",
    quantization_config=quantization_config,
)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
!pip install -U langchain-community
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
!pip install llama-index-embeddings-langchain
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))



  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))


In [11]:
import torch
from llama_index.core import Settings

# Ensure CUDA is used if available
device = "cuda" if torch.cuda.is_available() else "cpu"

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",  # Uses all available devices (CPU/GPU)
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    model_kwargs={"torch_dtype": torch.float16} if torch.cuda.is_available() else {},
)

Settings.llm = llm
Settings.chunk_size = 1024
Settings.embed_model=embed_model

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [12]:
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

In [None]:
index

In [13]:
query_engine=index.as_query_engine()

In [14]:
response=query_engine.query("what is solar system?")



In [16]:
from IPython.display import HTML
display(HTML('''
<style>
  pre {
      white-space: normal;
  }
</style>
'''))

print(response)

The Solar System is a gravitationally bound system of celestial objects that orbit around the Sun, including eight planets, dwarf planets, asteroids, comets, and other smaller bodies. The Sun is the center of the Solar System, and the objects in the system are held together by its gravitational pull. The Solar System formed around 4.6 billion years ago from a dense region of a molecular cloud, and it is estimated that the Sun and the Solar System's mass make up about 99.86% of the system's total mass. The Solar System's largest objects are the eight planets, which are divided into two categories: the terrestrial planets (Mercury, Venus, Earth, and Mars) and the gas giants (Jupiter and Saturn). In addition to the planets, the Solar System also includes dwarf planets, which are objects that are large enough to be rounded by their own gravity but have not cleared their orbits of other objects. The Solar System also contains many smaller bodies, such as asteroids, comets, and meteoroids.
