In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [6]:
import os
os.environ["ACTIVELOOP_TOKEN"] = os.getenv("ACTIVELOOP_TOKEN")

In [7]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

In [8]:
# Before executing the following code, make sure to have your
# Activeloop key saved in the “ACTIVELOOP_TOKEN” environment variable.

# instantiate the LLM and embeddings models
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [9]:
# create our documents
texts = [
    "Napoleon Bonaparte was born in 15 August 1769",
    "Louis XIV was born in 5 September 1638"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

In [11]:
# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = "hugo01andres" 
my_activeloop_dataset_name = "langchain_course_from_zero_to_heros"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:07<00:00,  7.96s/it]

Dataset(path='hub://hugo01andres/langchain_course_from_zero_to_heros', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   





['1ff22ff8-dcd0-11ee-9b09-6045bdc9871f',
 '1ff23124-dcd0-11ee-9b09-6045bdc9871f']

In [13]:
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=db.as_retriever()
)

In [14]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType

tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
)

In [15]:
response = agent.run("When was Napoleone born?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to answer this question
Action: Retrieval QA System
Action Input: When was Napoleone born?[0m
Observation: [36;1m[1;3m
Napoleon Bonaparte was born in 15 August 1769.[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Napoleon Bonaparte was born in 15 August 1769.[0m

[1m> Finished chain.[0m
Napoleon Bonaparte was born in 15 August 1769.


In [16]:
# load the existing Deep Lake dataset and specify the embedding function
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# create new documents
texts = [
    "Lady Gaga was born in 28 March 1986",
    "Michael Jeffrey Jordan was born in 17 February 1963"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Deep Lake Dataset in hub://hugo01andres/langchain_course_from_zero_to_heros already exists, loading from the storage


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:08<00:00,  8.86s/it]

Dataset(path='hub://hugo01andres/langchain_course_from_zero_to_heros', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (4, 1536)  float32   None   
    id        text      (4, 1)      str     None   
 metadata     json      (4, 1)      str     None   
   text       text      (4, 1)      str     None   





['3302061c-dcd1-11ee-9b09-6045bdc9871f',
 '33020748-dcd1-11ee-9b09-6045bdc9871f']

In [17]:
# instantiate the wrapper class for GPT3
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)

# create a retriever from the db
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

# instantiate a tool that uses the retriever
tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

# create an agent that uses the tool
agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
)

In [18]:
# Callback to see thie price the cb
from langchain.callbacks import get_openai_callback
with get_openai_callback() as cb:
    response = agent.run("When was Michael Jordan born?")
    print(response)
    print(cb)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Retrieval QA System to find the answer.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3m Michael Jordan was born on 17 February 1963.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: Michael Jordan was born on 17 February 1963.[0m

[1m> Finished chain.[0m
Michael Jordan was born on 17 February 1963.
Tokens Used: 494
	Prompt Tokens: 430
	Completion Tokens: 64
Successful Requests: 3
Total Cost (USD): $0.0


In [19]:
from langchain.llms import OpenAI

from langchain.agents import AgentType
from langchain.agents import load_tools
from langchain.agents import initialize_agent

from langchain.agents import Tool
from langchain.utilities import GoogleSearchAPIWrapper

In [20]:
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)

In [22]:
# remember to set the environment variables
# “GOOGLE_API_KEY” and “GOOGLE_CSE_ID” to be able to use
# Google Search via API.
search = GoogleSearchAPIWrapper()

In [23]:
# The Tool object represents a specific capability or function the system can use. In this case, it's a tool for performing Google searches.

# It is initialized with three parameters:

# name parameter: This is a string that serves as a unique identifier for the tool. In this case, the name of the tool is "google-search.”
# func parameter: This parameter is assigned the function that the tool will execute when called. In this case, it's the run method of the search object, which presumably performs a Google search.
# description parameter: This is a string that briefly explains what the tool does. The description explains that this tool is helpful when you need to use Google to answer questions about current events.

tools = [
    Tool(
        name = "google-search",
        func=search.run,
        description="useful for when you need to search google to answer questions about current events"
    )
]

In [24]:
# initialize_agent(): This function call creates and initializes an agent. An agent is a component that determines which actions to take based on user input. These actions can be using a tool, returning a response to the user, or something else.
# tools:  represents the list of Tool objects that the agent can use.
# agent="zero-shot-react-description": The "zero-shot-react-description" type of an Agent uses the ReAct framework to decide which tool to use based only on the tool's description.
# verbose=True: when set to True, it will cause the Agent to print more detailed information about what it's doing. This is useful for debugging and understanding what's happening under the hood.
# max_iterations=6: sets a limit on the number of iterations the Agent can perform before stopping. It's a way of preventing the agent from running indefinitely in some cases, which may have unwanted monetary costs.

agent = initialize_agent(tools, 
                         llm, 
                         agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
                         verbose=True,
                         max_iterations=6)

In [25]:
with get_openai_callback() as cb:
    response = agent("What's the latest news about the Mars rover?")
    print(response['output'])
    print(cb)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should search for news articles about the Mars rover.
Action: google-search
Action Input: "Mars rover news"[0m
Observation: [36;1m[1;3mThe Latest from Mars ... Videos from the rover show its shadow moving across the Martian surface during a 12-hour sequence while Curiosity remained parked. More ... Oct 19, 2022 ... After journeying this summer through a narrow, sand-lined pass, NASA's Curiosity Mars rover recently arrived in the “sulfate-bearing unit,” ... A pair of quakes in 2021 sent seismic waves deep into the Red Planet's core, giving scientists the best data yet on its size and composition. ... Ten sample tubes ... Apr 25, 2023 ... The fully robotic Zhurong, named after a mythical Chinese god of fire, was expected to have woken up in December after entering a planned sleep ... Mission Name: Mars 2020 · Rover Name: Perseverance · Main Job: Seek signs of ancient life and collect samples of rock and regolith (broken rock and soil)

In [26]:
from langchain.llms import OpenAI
from langchain.agents import Tool
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.agents import initialize_agent, AgentType

In [27]:
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0)

prompt = PromptTemplate(
    input_variables=["query"],
    template="Write a summary of the following text: {query}"
)

summarize_chain = LLMChain(llm=llm, prompt=prompt)

In [28]:
# remember to set the environment variables
# “GOOGLE_API_KEY” and “GOOGLE_CSE_ID” to be able to use
# Google Search via API.
search = GoogleSearchAPIWrapper()

tools = [
    Tool(
        name="Search",
        func=search.run,
        description="useful for finding information about recent events"
    ),
    Tool(
       name='Summarizer',
       func=summarize_chain.run,
       description='useful for summarizing texts'
    )
]

In [29]:
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True  
)

In [30]:
with get_openai_callback() as cb:
    response = agent("What's the latest news about the Mars rover? Then please summarize the results.")
    print(response['output'])
    print(cb)



[1m> Entering new  chain...[0m
[32;1m[1;3m I should use the Search tool to find recent news about the Mars rover.
Action: Search
Action Input: "Mars rover news"[0m
Observation: [36;1m[1;3mThe Latest from Mars ... Videos from the rover show its shadow moving across the Martian surface during a 12-hour sequence while Curiosity remained parked. More ... Oct 19, 2022 ... After journeying this summer through a narrow, sand-lined pass, NASA's Curiosity Mars rover recently arrived in the “sulfate-bearing unit,” ... A pair of quakes in 2021 sent seismic waves deep into the Red Planet's core, giving scientists the best data yet on its size and composition. ... Ten sample tubes ... Apr 25, 2023 ... The fully robotic Zhurong, named after a mythical Chinese god of fire, was expected to have woken up in December after entering a planned sleep ... Mission Name: Mars 2020 · Rover Name: Perseverance · Main Job: Seek signs of ancient life and collect samples of rock and regolith (broken rock a