## Install libraries and set up keys

In [1]:
%%capture

# Note: This could take up to 5 minutes

!pip install langchain              # Base LangChain
!pip install langchainhub           # Hub for prompt templates
!pip install langchain-core         # Core LangChain components
!pip install langchain-community    # Extenteded list of LangChain components
!pip install langchain-anthropic    # LangChain bindings for Anthropic LLM
!pip install langchain-openai       # LangChain bindings for OpenAI LLM
!pip install google-search-results  # Serp API
!pip install pypdf                  # Parse PDF docs
!pip install sentence_transformers  # Embedding
!pip install chromadb               # Vector DB

In [35]:
import os
from google.colab import userdata

os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
os.environ['SERPAPI_API_KEY'] = userdata.get('SERPAPI_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

In [4]:
!rm -rf ./repo
!git clone --depth 1 https://github.com/adeshmukh/gaiip-intro-orchestration.git ./repo

Cloning into './repo'...
remote: Enumerating objects: 14, done.[K
remote: Counting objects: 100% (14/14), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 14 (delta 1), reused 12 (delta 1), pack-reused 0[K
Receiving objects: 100% (14/14), 622.08 KiB | 2.46 MiB/s, done.
Resolving deltas: 100% (1/1), done.


## Load PDFs in Vector DB

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import logging

# Ignore pypdf warnings
logging.getLogger("pypdf").setLevel(logging.ERROR)


base_path = "./repo/pdfs"
pdf_documents = os.listdir(base_path)
docs = []

# Load docs as Chroma Document objects
for pdf_document in pdf_documents:
  pdf_loader = PyPDFLoader(f"{base_path}/{pdf_document}")
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
  pages = pdf_loader.load_and_split(text_splitter=splitter)
  print(f"Loaded {len(pages)} pages from {pdf_document}")
  docs.extend(pages)

Loaded 31 pages from us-national-budget-health-fy24.pdf
Loaded 6 pages from us-national-budget-nasa-fy24.pdf
Loaded 7 pages from us-national-budget-civil-fy24.pdf
Loaded 7 pages from us-national-budget-nsf-fy24.pdf
Loaded 7 pages from us-national-budget-treasury-fy24.pdf
Loaded 6 pages from us-national-budget-ssa-fy24.pdf


In [7]:
%%capture

from langchain.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings
)

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = Chroma.from_documents(docs, embedding_function)

In [9]:

# Maximal Marginal Relevance (MMR) optimizes for similarity to query and diversity among selected documents.
retriever = vector_db.as_retriever(search_type='mmr')
hits = retriever.invoke("What is the total requested budget for NASA and NSF?")

for hit in hits[:10]: # Peek at the top 10 hits
  print('-------------------------------------------')
  print(hit.page_content, hit.metadata)


-------------------------------------------
125NATIONAL AERONAUTICS AND 
SPACE ADMINISTRATIONThe National Aeronautics and Space Administration (NASA) is responsible for sending astronauts 
and robotic missions to explore the solar system, advancing the Nation’s understanding of 
the Earth and space, and developing new technologies and approaches to improve aviation 
and space activities.  The President’s 2024 Budget for NASA:  supports human and robotic 
exploration of the Moon; invests in new technologies to improve the Nation’s space capabilities; 
and promotes cutting-edge Earth-observing satellites and green aviation research to help address 
pressing environmental challenges.
The Budget requests $27.2 billion in discretionary budget authority for 2024, a $1.8 billion or 
7-percent increase from the 2023 enacted level. 
The President’s 2024 Budget:  
• Supports the Artemis Program’s Next Great Achievements.   Following the successful {'page': 0, 'source': './repo/pdfs/us-national-b

In [None]:
from langchain.chains import RetrievalQA
vector_db_chain =

In [None]:
# =====================================================

## Construct the Agent

In [45]:
from langchain import hub
from langchain.agents import (
    create_react_agent,
    load_tools,
    AgentType,
    AgentExecutor,
  )
from langchain_openai import OpenAI
from langchain_anthropic import ChatAnthropic

from langchain.callbacks.tracers import ConsoleCallbackHandler


# llm = OpenAI(model="gpt-3.5-turbo", temperature=0) # temperature = 0 results in fewer hallucinations
llm = ChatAnthropic(model='claude-3-opus-20240229')

# Tools are functions that agents can use to interact with the world.
tools = load_tools(["serpapi", "llm-math"], llm=llm)



# Initialize agent
prompt = hub.pull("hwchase17/react")
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools)


TypeError: unhashable type: 'VectorStoreRetriever'

In [40]:
agent_executor.invoke({"input": "What is the total requested budget for NASA and NSF for Fiscal Year 2024?"}, config={'callbacks': [ConsoleCallbackHandler()]})

[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "What is the total requested budget for NASA and NSF?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad> > chain:RunnableLambda] Entering Chain run with input:
[0m{
  "input": ""
}
[36;1m[1;3m[chain/end][0m [1m[chain:AgentExecutor > chain:Runn

{'input': 'What is the total requested budget for NASA and NSF?',
 'output': 'The total requested budget for NASA and NSF in fiscal year 2023 is $36.492 billion.'}

In [46]:
agent_executor.invoke({"input": "Which of the following cities has the least forecasted temperature on June 20th: Chicago, Houston, Jacksonville?"})

[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "Which of the following cities has the least forecasted temperature on June 20th: Chicago, Houston, Jacksonville?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[chain:AgentExecutor > chain:RunnableSequence > chain:RunnableAssign<agent_scratchpad> > chain:RunnableParallel<agent_scratchpad> > chain:RunnableLambda] Entering Chain run with input:
[0m{
  "input": ""
}
[36;1

{'input': 'Which of the following cities has the least forecasted temperature on June 20th: Chicago, Houston, Jacksonville?',
 'output': 'Chicago is forecasted to have the lowest temperature of 78°F on June 20th compared to Houston and Jacksonville, which are both forecasted to reach 88°F.'}

In [None]:
agent_executor.invoke({"input": "Which of the following cities has the least forecasted temperature on June 20th: Chicago, Houston, Jacksonville?"},
                      config={'callbacks': [ConsoleCallbackHandler()]})