## Install libraries and set up keys

In [1]:
%%capture

!pip install llama-index                         # llama-index core
!pip install llama-index-llms-openai             # OpenAI bindings
!pip install llama-index-llms-anthropic          # Anthropic bindings
!pip install llama-index-embeddings-huggingface  # HuggingFace Embeddings
!pip install llama-index-tools-google            # Google Search
!pip install llama-index-tools-bing-search       # Bing Search
!pip install llama-index-tools-wikipedia         # Wikipedia
!pip install pip install llama-index-tools-tavily-research # Tavily research API


!pip install google-search-results  # Serp API
!pip install pypdf                  # Parse PDF docs
!pip install sentence_transformers  # Embedding
!pip install chromadb               # Vector DB

In [44]:
import os
from google.colab import userdata

# os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
# os.environ['SERPAPI_API_KEY'] = userdata.get('SERPAPI_API_KEY')
# os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['BING_API_KEY'] = userdata.get('BING_API_KEY')
os.environ['TAVILY_API_KEY'] = userdata.get('TAVILY_API_KEY')

In [3]:
!rm -rf ./repo
!git clone --depth 1 https://github.com/adeshmukh/gaiip-intro-orchestration.git ./repo

Cloning into './repo'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 16 (delta 1), reused 13 (delta 1), pack-reused 0[K
Receiving objects: 100% (16/16), 635.09 KiB | 2.04 MiB/s, done.
Resolving deltas: 100% (1/1), done.


## Load PDFs in Vector DB

In [4]:
%%capture

from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.tools import QueryEngineTool, ToolMetadata

# Ignore pypdf warnings
import logging
logging.getLogger("pypdf").setLevel(logging.ERROR)

# load data from PDFs
base_path = "./repo/pdfs"
pdf_files = os.listdir(base_path)
pdf_file_paths = [os.path.join(base_path, file) for file in pdf_files]

docs = SimpleDirectoryReader(
    input_files=pdf_file_paths
).load_data()


In [5]:
%%capture

# Build the index
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding()

# Enable below lines for huggingface embedding. Needs HF_API_TOKEN
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Settings.embed_model = embed_model
pdf_index = VectorStoreIndex.from_documents(docs, show_progress=True)

# Persist index to local disk
pdf_index.storage_context.persist(persist_dir="./storage/pdfs")


Parsing nodes:   0%|          | 0/17 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/17 [00:00<?, ?it/s]

In [6]:
# Wrap the vector DB in a tool
pdf_query_engine = pdf_index.as_query_engine(similarity_top_k=3)
vector_db_tool = QueryEngineTool(
        query_engine=pdf_query_engine,
        metadata=ToolMetadata(
            name="us_national_budget_fy24",
            description=(
                "Provides information about US national budget for the fiscal year 2024. "
                "Use a detailed plain text question as input to the tool."
            ),
        ),
    )

## Zero-shot ReAct agent

In [46]:
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from llama_index.llms.anthropic import Anthropic
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.tools.bing_search import BingSearchToolSpec
from llama_index.tools.tavily_research import TavilyToolSpec

Simple functions that will be used as a tool

In [52]:
from typing import List

def multiply(a: float, b: float) -> float:
    """Multiply two numbers and returns the result number"""
    return a * b

def add(a: float, b: float) -> float:
    """Add two numbers and returns the result number"""
    return a + b

def average(a: List[float]) -> float:
    """Calculates and returns the average of the specified list of numbers"""
    return sum(a) / len(a)

In [54]:
multiply_tool = FunctionTool.from_defaults(fn=multiply)
add_tool = FunctionTool.from_defaults(fn=add)
web_search_tool = BingSearchToolSpec(api_key=os.environ['BING_API_KEY']).to_tool_list()[0]
tavily_search = TavilyToolSpec(api_key=os.environ['TAVILY_API_KEY']).to_tool_list()[0]

# llm = OpenAI(model="gpt-3.5-turbo-instruct")
llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0)
# llm = Anthropic(model='claude-3-opus-20240229')

agent = ReActAgent.from_tools([multiply_tool, add_tool, vector_db_tool, web_search_tool, tavily_search], llm=llm, verbose=True)

In [24]:
response = agent.chat("What is the total requested budget for NASA and NSF for Fiscal Year 2024?")

[1;3;38;5;200mThought: The user is asking for specific budget information for NASA and NSF for Fiscal Year 2024. I can use the "us_national_budget_fy24" tool to retrieve this information.
Action: us_national_budget_fy24
Action Input: {'input': 'What is the total requested budget for NASA and NSF for Fiscal Year 2024?'}
[0m[1;3;34mObservation: The total requested budget for NASA and NSF for Fiscal Year 2024 is $38.5 billion.
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: The total requested budget for NASA and NSF for Fiscal Year 2024 is $38.5 billion.
[0m

In [49]:
response = agent.chat("Which of the following cities has the least forecasted temperature tomorrow: Chicago, Houston, Jacksonville?")

[1;3;38;5;200mThought: I need to find the forecasted temperatures for Chicago, Houston, and Jacksonville.
Action: search
Action Input: {'query': 'Chicago weather forecast tomorrow'}
[0m[1;3;34mObservation: [Document(id_='af768a22-bf1f-4a04-8527-e23323ceb2e3', embedding=None, metadata={'url': 'https://world-weather.info/forecast/usa/chicago/june-2024/'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Extended weather forecast in Chicago. Hourly Week 10 days 14 days 30 days Year. Detailed ⚡ Chicago Weather Forecast for June 2024 - day/night 🌡️ temperatures, precipitations - World-Weather.info.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), Document(id_='9667b0fc-f7cd-4b85-a00f-cb7b2ecbc750', embedding=None, metadata={'url': 'https://www.weatherapi.com/'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="{'