#Build a RAG agent that can run on llama

%md
![Llama RAG implementation.png](./Llama RAG implementation.png "Llama RAG implementation.png")

In [0]:
from langchain_core.messages import HumanMessage, SystemMessage
from databricks_langchain import ChatDatabricks

In [0]:
from  dotenv import load_dotenv
_ = load_dotenv()

In [0]:
chat_model = ChatDatabricks(
    endpoint='otc-lama-poc',
    temperature=0,
    max_tokens=1000
)
chat_model_json = ChatDatabricks(
    endpoint='otc-lama-poc',
    temperature=0,
    # max_tokens=1000,
    return_json=True
)

In [0]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_nomic.embeddings import NomicEmbeddings

In [0]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

In [0]:
#load documents
documents = [WebBaseLoader(url).load() for url in urls]
doc_list = [item for sublist in documents for item in sublist] 

In [0]:
#split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
doc_splits = text_splitter.split_documents(doc_list)

In [0]:
#load vector store
vectorstore = SKLearnVectorStore.from_documents(documents=doc_splits, 
                                                embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"))

In [0]:
# ROUTER 
import json
from langchain_core.messages import HumanMessage, SystemMessage

#Prompt
router_instructions = '''You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. For all else, and especially for current events, use web-search.
Return JSON format with single key, datasource, that is 'websearch' or 'vectorstore' depending on the question. 
No formatting or comments required. Pure json format.'''

#Test
test_websearch = chat_model_json.invoke([SystemMessage(content=router_instructions), 
                                         HumanMessage(content="who won champions tropy 2025?")])
test_vectorstore = chat_model_json.invoke([SystemMessage(content=router_instructions), HumanMessage(content="What is prompt engineering?")])
print (json.load(test_websearch)["content"])
print (json.load(test_vectorstore)["content"])                                      

In [0]:
#RETRIEVE DOCUMENTS
retriever = vectorstore.as_retriever(k=3)
retriever.invoke("Agent memory")

In [0]:
#GRADE DOCUMENTS
#Doc Grade Instructions
doc_grade_instructions = """You are a grader assessing relevance of a retrieved document to a user question.
If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant."""

#Doc Grade Prompt
doc_grade_prompt = """"Here is the retrieved document: \n\n 

{document}
 \n\n Here is the user question: \n\n 
 
 {question}. 
This carefully and objectively assess whether the document contains at least some information that is relevant to the question.
Return JSON with single key, binary_score, that is 'yes' or 'no' score to indicate whether the document contains at least some information that is relevant to the question.
"""
doc_grade_prompt_formatted = doc_grade_prompt.format(document="{document}", question="{question}")  
results = chat_model_json.invoke([SystemMessage(content=doc_grade_instructions), HumanMessage(content=doc_grade_prompt_formatted)])
print(json.load(results)["content"])






In [0]:
#GENERATE ANSWER
rag_prompt = """You are an assistant for question-answering tasks. 
Here is the context to use to answer the question:

{context} 

Think carefully about the above context. 
Now, review the user question:

{question}

Provide an answer to this questions using only the above context. 
Use three sentences maximum and keep the answer concise.

Answer:"""

In [0]:
# HALLUCINATION CHECKER

In [0]:
# GRADE THE ANSWER