In [2]:
%pip install -q langchain unstructured[all-docs] langchain-community arxiv wikipedia langgraph langchainhub langchain-nvidia-ai-endpoints==0.0.11

Note: you may need to restart the kernel to use updated packages.


In [3]:
import getpass
import os

## API Key can be found by going to NVIDIA NGC -> AI Foundation Models -> (some model) -> Get API Code or similar.
## 10K free queries to any endpoint (which is a lot actually).

# del os.environ['NVIDIA_API_KEY']  ## delete key and reset
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key

In [4]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

# get all available models
ChatNVIDIA.get_available_models()

[Model(id='ai-arctic', model_type='chat', api_type=None, model_name='snowflake/arctic', client='ChatNVIDIA', path='7408b6b5-09e7-4ae5-a3fe-2db063e4e609'),
 Model(id='ai-codegemma-7b', model_type='chat', api_type=None, model_name='google/codegemma-7b', client='ChatNVIDIA', path='7dfc10a8-3cc4-448e-97c1-2213308dc222'),
 Model(id='ai-codellama-70b', model_type='chat', api_type=None, model_name='meta/codellama-70b', client='ChatNVIDIA', path='f6b06895-d073-4714-8bb2-26c09e9f6597'),
 Model(id='ai-fuyu-8b', model_type='image_in', api_type=None, model_name=None, client='ChatNVIDIA', path='e598bfc1-b058-41af-869d-556d3c7e1b48'),
 Model(id='ai-gemma-2b', model_type='chat', api_type=None, model_name='google/gemma-2b', client='ChatNVIDIA', path='04174188-f742-4069-9e72-d77c2b77d3cb'),
 Model(id='ai-gemma-7b', model_type='chat', api_type=None, model_name='google/gemma-7b', client='ChatNVIDIA', path='a13e3bed-ca42-48f8-b3f1-fbc47b9675f9'),
 Model(id='ai-google-deplot', model_type='image_in', api_ty

In [5]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

llm = ChatNVIDIA(model='ai-llama2-70b') #mistralai/mixtral-8x22b-instruct-v0.1, ai-mistral-large

embedder = NVIDIAEmbeddings(model="ai-embed-qa-4")

In [6]:
%pip install faiss-cpu

from langchain_community.document_loaders import OnlinePDFLoader
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
# load document from arxiv
loader = OnlinePDFLoader("https://arxiv.org/pdf/2312.10997")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200
).split_documents(docs)
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever()

Note: you may need to restart the kernel to use updated packages.


In [7]:
retriever.invoke("What is RAG?")[0]

Document(page_content='The development of the RAG ecosystem is greatly impacted by the progression of its technical stack. Key tools like LangChain and LLamaIndex have quickly gained popularity with the emergence of ChatGPT, providing extensive RAG- related APIs and becoming essential in the realm of LLMs.The emerging technology stack, while not as rich in features as LangChain and LLamaIndex, stands out through its specialized products. For example, Flowise AI prioritizes a low-code approach, allowing users to deploy AI applications, including RAG, through a user-friendly drag-and-drop interface. Other technologies like HayStack, Meltano, and Cohere Coral are also gaining attention for their unique contributions to the field. In addition to AI-focused vendors, traditional software and cloud service providers are expanding their offerings to include RAG-centric services. Weaviate’s Verba 11 is designed for personal assistant applications, while Amazon’s Kendra 12 offers intelligent ent

In [8]:
os.environ['TAVILY_API_KEY']=getpass.getpass()

In [9]:
from langchain.tools.retriever import create_retriever_tool

from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools.tavily_search import TavilySearchResults

# convert the retriever as a tool
retriever_search = create_retriever_tool(retriever, "RAG doc", "Search for info on RAG")

# arxiv tool
arxiv =  ArxivAPIWrapper()

# tavily tool
tavily_search = TavilySearchResults()

# wikipedia tool
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())


In [10]:
response = retriever_search.run("what is RAG?")
response

'The development of the RAG ecosystem is greatly impacted by the progression of its technical stack. Key tools like LangChain and LLamaIndex have quickly gained popularity with the emergence of ChatGPT, providing extensive RAG- related APIs and becoming essential in the realm of LLMs.The emerging technology stack, while not as rich in features as LangChain and LLamaIndex, stands out through its specialized products. For example, Flowise AI prioritizes a low-code approach, allowing users to deploy AI applications, including RAG, through a user-friendly drag-and-drop interface. Other technologies like HayStack, Meltano, and Cohere Coral are also gaining attention for their unique contributions to the field. In addition to AI-focused vendors, traditional software and cloud service providers are expanding their offerings to include RAG-centric services. Weaviate’s Verba 11 is designed for personal assistant applications, while Amazon’s Kendra 12 offers intelligent enterprise search\n\n2) N

In [17]:
from langchain.tools import Tool

tavily_tool = Tool.from_function(
    func=tavily_search.invoke,
    name='tavily',
    description='search engine'
)
retriever_tool = Tool.from_function(
    func=retriever_search.invoke,
    name='retriever_docs',
    description='Tool to retrieve document'
)

tool_list = [tavily_tool, retriever_tool]

In [19]:
os.environ['SERPAPI_API_KEY'] = getpass.getpass()

In [28]:
%pip install langchain-openai
os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4")

Collecting langchain-openai
  Using cached langchain_openai-0.1.7-py3-none-any.whl.metadata (2.5 kB)
Collecting openai<2.0.0,>=1.24.0 (from langchain-openai)
  Using cached openai-1.30.3-py3-none-any.whl.metadata (21 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Using cached tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)
Collecting distro<2,>=1.7.0 (from openai<2.0.0,>=1.24.0->langchain-openai)
  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Using cached langchain_openai-0.1.7-py3-none-any.whl (34 kB)
Using cached openai-1.30.3-py3-none-any.whl (320 kB)
Using cached tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl (906 kB)
Using cached distro-1.9.0-py3-none-any.whl (20 kB)
Installing collected packages: distro, tiktoken, openai, langchain-openai
Successfully installed distro-1.9.0 langchain-openai-0.1.7 openai-1.30.3 tiktoken-0.7.0
Note: you may need to restart the kernel to use updated packages.


In [32]:
# Binding the tools with LLM
model_with_tools = model.bind_tools(tool_list)

In [33]:
from langchain_core.messages import HumanMessage

# now run model with tools
response = model_with_tools.invoke([HumanMessage(content="Hi!")])

print(f"ContentString: {response.content}")
print(f"ToolCalls: {response.tool_calls}")

ContentString: Hello! How can I assist you today?
ToolCalls: []


In [34]:
# create agent
from langgraph.prebuilt import chat_agent_executor

agent_executor = chat_agent_executor.create_tool_calling_executor(model, tool_list)

In [35]:
response = agent_executor.invoke({"messages": [HumanMessage(content="hi!")]})

response["messages"]

[HumanMessage(content='hi!', id='bb164567-9b08-4f74-83b4-f375c410e1a1'),
 AIMessage(content='Hello! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 70, 'total_tokens': 80}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-8dd0b254-1db2-42e9-861c-05ec7f8559b9-0')]

In [36]:
response = agent_executor.invoke(
    {"messages": [HumanMessage(content="explain Retrieval Augmented Generation?")]}
)
response["messages"]

[HumanMessage(content='explain Retrieval Augmented Generation?', id='1c4fd8d9-f5b7-4c44-a6da-d3a5707e03fa'),
 AIMessage(content="Retrieval Augmented Generation (RAG) is a method that combines the benefits of both the retrieval-based and generation-based models for open-domain question answering tasks. The method leverages the advantages of extractive methods that retrieve relevant documents and generative methods that generate responses in a fine-grained manner. \n\nHere's how it works:\n\n1. **Document Retrieval**: Given an input query, RAG retrieves a set of documents that are relevant to the query. This is similar to the process used in a search engine, where the system retrieves the most relevant documents or articles related to a search query.\n\n2. **Contextual Understanding**: The retrieved documents are then provided as context to a sequence-to-sequence model. This model understands the context from these documents and uses it to generate a response.\n\n3. **Response Generation

In [37]:
%%capture --no-stderr # what is this?
%pip install -U langgraph langchain langchain-openai

In [38]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o")

In [40]:
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant"),
        ("human", "{input}"),
        # Placeholders fill up a **list** of messages
        ("placeholder", "{agent_scratchpad}"),
    ]
)

agent = create_tool_calling_agent(model, tool_list, prompt)
agent_executor = AgentExecutor(agent=agent,
                               tools=tool_list)

In [41]:
query = "explain Retrieval Augmented Generation?"
agent_executor.invoke({"input": query})

{'input': 'explain Retrieval Augmented Generation?',
 'output': "Retrieval-Augmented Generation (RAG) is an advanced technique in natural language processing (NLP) that combines retrieval mechanisms with generative models to improve the quality and accuracy of generated text. Here's a breakdown of the concept:\n\n### 1. **Retrieval Mechanism**\n- **Purpose**: The retrieval component fetches relevant documents or pieces of information from a large corpus or database that are related to the input query or context.\n- **Technology**: Often, this involves using search engines, similarity measures, or trained retrieval models.\n\n### 2. **Generative Model**\n- **Purpose**: The generative model, typically a variant of Transformer models like GPT-3, uses the retrieved information and the input query to generate coherent and contextually accurate text.\n- **Technology**: These models are trained on large datasets and can produce human-like text based on the input they receive.\n\n### 3. **Comb

In [29]:
%pip install google-search-results
from langchain_community.utilities import SerpAPIWrapper, WikipediaAPIWrapper
from langchain.agents import AgentExecutor, load_tools, create_tool_calling_agent
from langgraph.prebuilt import chat_agent_executor

tools = load_tools(["serpapi","wikipedia"],llm=llm)

agent_executor = chat_agent_executor.create_tool_calling_executor(model, tools) # the issue is does not work with open-source model !!!

Note: you may need to restart the kernel to use updated packages.


In [25]:

from langchain_core.prompts import ChatPromptTemplate
from langchain import hub
from langchain.agents import create_tool_calling_agent, AgentExecutor, load_tools
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import (
    ReActJsonSingleInputOutputParser,
)
from langchain.tools.render import render_text_description

# setup ReAct style prompt
prompt = hub.pull("hwchase17/react-json")
prompt = prompt.partial(
    tools=render_text_description(tools),
    tool_names=", ".join([t.name for t in tools]),
)
# define the agent
chat_model_with_stop = llm.bind(stop=["\nObservation"])
agent = (
    {
        "input": lambda x: x["input"],
        "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
    }
    | prompt
    | chat_model_with_stop
    | ReActJsonSingleInputOutputParser()
)

# instantiate AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [None]:
agent_executor.invoke({"input":"what is Generative AI?"})