In [1]:
import langchain

from langchain.schema import format_document
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough, RunnableParallel, chain

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Together

from langchain.prompts.prompt import PromptTemplate
from operator import itemgetter

import os
import config
import json
import datetime
from io import StringIO

In [2]:
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_API_KEY'] = 'xxx'
os.environ['GROQ_API_KEY'] = 'xxx'

In [14]:
embeddings = HuggingFaceEmbeddings(model_name='hkunlp/instructor-large', # somehow only this works and the default HuggingFaceEmbeddings() 
                                   # i.e. sentence-transformers/all-mpnet-base-v2 and 'sentence-transformers/all-MiniLM-L6-v2' don't 
                                   # return any for retriever.invoke
                                           model_kwargs={'device': 'cpu', }) # cuda, cpu

db = FAISS.load_local('repo.db2', embeddings, allow_dangerous_deserialization=True)

retriever = db.as_retriever(
                search_type="similarity_score_threshold", # mmr # similarity_score_threshold
                search_kwargs={"k": 3, "score_threshold": 0.5}
)

In [35]:
from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192")

# _template = """
# You're an AI assistant trained on the content of a web site.

# Given an optional Chat History and a New Question, rephrase the new question to be a standalone question. No pre-amble.

# If the New Question is a general greeting, set the standalone question to be the same as the New Question. No pre-amble.

# Chat History:
# {chat_history}

# New Question: {question}

# Standalone question:
# """

# CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

# search_template = """
# You are a helpful, respectful and honest assistant.
# Given a user question, your task is to enrich the user question with more relevant information.
# You can also summarize the chat history to add relevant context to the user question.
# Do not ask any follow up question to the users.

# Chat history:
# {chat_history}
# User question:
# {question}
# """
# SEARCH_PROMPT = PromptTemplate.from_template(search_template)

answer_template = """
You are a helpful, respectful and honest assistant.
If question is related to the context, you should ONLY use the provided CONTEXT to answer the question. DO NOT USE INFORMATION NOT IN THE CONTEXT.
If question is not related to the context, respond like a general AI assistant.

<CONTEXT>:
{context}
</CONTEXT>

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(answer_template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{source}: {page_content}")

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    print(f"{doc_strings=}")
    return document_separator.join(doc_strings)

# _inputs = RunnableParallel(
#     standalone_question=RunnablePassthrough.assign(
#         chat_history=lambda x: get_buffer_string(x['chat_history'][-6:])
#     )
#     | SEARCH_PROMPT
#     | llm
#     | StrOutputParser(),
# )

# _context = {
#     "context": itemgetter("standalone_question") | retriever | _combine_documents,
#     "question": lambda x: x["standalone_question"],
# }

# chat_chain = _inputs | _context | ANSWER_PROMPT | llm

chat_chain = (ANSWER_PROMPT | llm)

@chain
def custom_chain(inputs):
    # refined_question = _inputs.invoke(inputs)
    
    # search_query = """
    # Question: {question}
    # {context_from_llm}
    # """

    # query_format = {"question": prompt, "context_from_llm": refined_question['standalone_question']}
    # search_query = search_query.format(**query_format)
    
    #docs = retriever.invoke(search_query)
    docs = retriever.invoke(inputs['question'])
    for doc in docs:
        if "url" in doc.metadata:
            doc.metadata['source'] = doc.metadata['url']

    context = _combine_documents(docs)

    answer = chat_chain.invoke({"context": context, "question": inputs['question']})
    print(f"{answer.content=}")
    return {'response': answer.content, 'source_document': docs}


In [15]:
docs_scores = db.similarity_search_with_score(query="What does LLM agents use?",
                                              k=4,
                                              score_threshold=0.2)
[doc for doc, score in docs_scores]


[]

In [13]:
HuggingFaceEmbeddings()

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-mpnet-base-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [36]:
#prompt = "what is RAG Agent?"
prompt = "What does LLM agents use?"
answer = custom_chain.invoke(
    {
        "question": prompt,
        "chat_history": []
    }
)

response = answer['response']
response

doc_strings=['https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md: # LangChain <> Llama3 Cookbooks\n\n### `Agents`\n\nLLM agents use [planning, memory, and tools](https://lilianweng.github.io/posts/2023-06-23-agent/) to accomplish tasks. Here, we show how to build agents capable of [tool-calling](https://python.langchain.com/docs/integrations/chat/) using [LangGraph](https://python.langchain.com/docs/langgraph) with Llama 3.', 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md: Tool-calling agents with LangGraph use two nodes: (1) a LLM node decides which tool to invoke based upon the user input. It outputs the tool name and tool arguments to use based upon the input. (2) the tool name and arguments are passed to a tool node, which calls the tool with the specified arguments and returns the result back to the LLM.\n\n![Screenshot 2024-06-06 at 12 36 57 PM](https://github.com/rlancema

'According to the context, LLM agents use planning, memory, and tools to accomplish tasks.'

In [33]:
docs = retriever.invoke(prompt)    

In [34]:
docs

[Document(page_content='# LangChain <> Llama3 Cookbooks\n\n### `Agents`\n\nLLM agents use [planning, memory, and tools](https://lilianweng.github.io/posts/2023-06-23-agent/) to accomplish tasks. Here, we show how to build agents capable of [tool-calling](https://python.langchain.com/docs/integrations/chat/) using [LangGraph](https://python.langchain.com/docs/langgraph) with Llama 3.', metadata={'path': 'recipes/use_cases/agents/langchain/README.md', 'sha': '3040ff1be9deab4bcdd56f79d312a894b8ee46dd', 'source': 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md'}),
 Document(page_content='Tool-calling agents with LangGraph use two nodes: (1) a LLM node decides which tool to invoke based upon the user input. It outputs the tool name and tool arguments to use based upon the input. (2) the tool name and arguments are passed to a tool node, which calls the tool with the specified arguments and returns the result back to the LLM.\n\n![Scree

In [17]:
answer

{'response': 'According to the context, a RAG Agent is a custom Llama 3 powered agent that uses ideas from 3 papers and is built using LangGraph. It involves a sequence of steps (planning), memory to pass information between steps, and tool use for tasks such as retrieval.',
 'source_document': [Document(page_content='Our first notebook, `langgraph-tool-calling-agent`, shows how to build our agent mentioned above using LangGraph.\n\nSee this [video overview](https://youtu.be/j2OAeeujQ9M) for more detail on the design of this agent.\n\n--- \n\n### `RAG Agent`\n\nOur second notebook, `langgraph-rag-agent`, shows how to apply LangGraph to build a custom Llama 3 powered RAG agent that uses ideas from 3 papers:', metadata={'path': 'recipes/use_cases/agents/langchain/README.md', 'sha': '3040ff1be9deab4bcdd56f79d312a894b8ee46dd', 'source': 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md'}),
  Document(page_content='We implement each appr

In [18]:
docs = answer['source_document']

if len(docs) > 0:
    response += "\n> source:"

    for doc in docs:
        response += doc.metadata['source']
else:
   response = "Sorry, I can't find any relevant documents for your question. Please rephrase your question."


In [19]:
response

'According to the context, a RAG Agent is a custom Llama 3 powered agent that uses ideas from 3 papers and is built using LangGraph. It involves a sequence of steps (planning), memory to pass information between steps, and tool use for tasks such as retrieval.\n> source:https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.mdhttps://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.mdhttps://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md'

In [None]:
prompt = "what is RAG Agent?"
#prompt = "What does LLM agents use?"
answer = custom_chain.invoke(
    {
        "question": prompt,
        "chat_history": []
    }
)

response = answer['response']
response

In [25]:
len(docs)

3

In [31]:
docs

[Document(page_content='Our first notebook, `langgraph-tool-calling-agent`, shows how to build our agent mentioned above using LangGraph.\n\nSee this [video overview](https://youtu.be/j2OAeeujQ9M) for more detail on the design of this agent.\n\n--- \n\n### `RAG Agent`\n\nOur second notebook, `langgraph-rag-agent`, shows how to apply LangGraph to build a custom Llama 3 powered RAG agent that uses ideas from 3 papers:', metadata={'path': 'recipes/use_cases/agents/langchain/README.md', 'sha': '3040ff1be9deab4bcdd56f79d312a894b8ee46dd', 'source': 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md'}),
 Document(page_content='We implement each approach as a control flow in LangGraph:\n- **Planning:** The sequence of RAG steps (e.g., retrieval, grading, and generation) that we want the agent to take.\n- **Memory:** All the RAG-related information (input question, retrieved documents, etc) that we want to pass between steps.\n- **Tool use:**

In [29]:
docs[2].metadata['url']=docs[2].metadata['source']

In [30]:
docs[2].metadata

{'path': 'recipes/use_cases/agents/langchain/README.md',
 'sha': '3040ff1be9deab4bcdd56f79d312a894b8ee46dd',
 'source': 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md',
 'url': 'https://api.github.com/meta-llama/llama-recipes/blob/main/recipes/use_cases/agents/langchain/README.md'}