In [42]:
from IPython.display import display, Markdown

## RAG System Using Llama2 With Hugging Face

In [1]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext,PromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [None]:
documents=SimpleDirectoryReader("/home/achuthchandrasekhar/Documents/AMGPT/papers_tex").load_data()
documents

In [2]:
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language.
- Never generate offensive or foul language.
-Never use references in square brackets or otherwise in the output, but provide material examples if possible.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
)

#Change this prompt to match your specifications. 

In [3]:
!huggingface-cli login --token #kep token here without quotes and then run this cell for successful huggingface login

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/achuthchandrasekhar/.cache/huggingface/token
Login successful


In [4]:
hf_token = " " #paste token in quotes

In [6]:
import torch
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    bnb_8bit_compute_dtype=torch.float16,
    bnb_8bit_quant_type="nf8",
    bnb_8bit_use_double_quant=True,
)

# you may use the huggingface llm of your choice here 
#remember to paste the proper name to get it. 
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=SYSTEM_PROMPT,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="cuda:1",
    # uncomment this if using CUDA to reduce memory usage
    #model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
    model_kwargs={"token": hf_token, "quantization_config": quantization_config}
)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.85s/it]


In [8]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
#from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")) #use the embedding model of your choice from huggingface

  _torch_pytree._register_pytree_node(


In [9]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
index=VectorStoreIndex.from_documents(documents)
index.storage_context.persist(persist_dir=" ") # paste your directory path here when you create the index at the beginning

#DO NOT RUN THIS CELL IF INDEX IS ALREADY CREATED. RUN THE NEXT ONE!!!!!!!

In [10]:
from llama_index.core import StorageContext, load_index_from_storage
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=" ") # paste the directory path of your ALREADY CREATED INDEX HERE

# load index
index = load_index_from_storage(storage_context)

In [11]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7f402c48c850>

In [12]:
query_engine=index.as_query_engine()

In [None]:
response=query_engine.query("What is the relation between meltpool dimensions and density?") # paste your query here
markdown_response = f"""
### Query Response:

{response}
"""

# Display the Markdown-formatted response in the notebook
display(Markdown(markdown_response))