In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA



In [6]:
llm = ChatOpenAI(temperature=0)
embeddings = OpenAIEmbeddings()

In [7]:
texts = [
    "Napoleon Bonaparte was born in 15 August 1769",
    "Louis XIV was born in 5 September 1638"
]

In [9]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

In [10]:
# Create deeplake dataset
active_loop_id = 'cpatrickalves'
my_activeloop_dataset_name = "langchain_course_from_zero_to_hero"
dataset_path = f"hub://{active_loop_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

db.add_documents(docs)

Your Deep Lake dataset has been successfully created!


creating embeddings: 100%|██████████| 1/1 [00:04<00:00,  4.59s/it]
100%|██████████| 2/2 [00:01<00:00,  1.44it/s]
-

Dataset(path='hub://cpatrickalves/langchain_course_from_zero_to_hero', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
   text       text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   


 

['90c115e8-5713-11ee-b52f-86a3ad70e21d',
 '90c1169c-5713-11ee-b52f-86a3ad70e21d']

In [11]:
retrieval_qa = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=db.as_retriever())

In [12]:
# Creating an agent
from langchain.agents import initialize_agent, Tool, AgentType

tools = [
    Tool(
        name='Retrieval QA System',
        func=retrieval_qa.run,
        description='Useful for answering questions'
    )
]

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [13]:
response = agent.run('When was Napoleone born?')
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3mI should use the Retrieval QA System to find the answer to this question.
Action: Retrieval QA System
Action Input: "When was Napoleone born?"[0m
Observation: [36;1m[1;3mNapoleon Bonaparte was born on 15 August 1769.[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: Napoleon Bonaparte was born on 15 August 1769.[0m

[1m> Finished chain.[0m
Napoleon Bonaparte was born on 15 August 1769.


In [14]:
# load the existing Deep Lake dataset and specify the embedding function
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# create new documents
texts = [
    "Lady Gaga was born in 28 March 1986",
    "Michael Jeffrey Jordan was born in 17 February 1963"
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# add new documents to our Deep Lake dataset
db.add_documents(docs)

Deep Lake Dataset in hub://cpatrickalves/langchain_course_from_zero_to_hero already exists, loading from the storage


creating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.30it/s]
100%|██████████| 2/2 [00:01<00:00,  1.21it/s]
-

Dataset(path='hub://cpatrickalves/langchain_course_from_zero_to_hero', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (4, 1536)  float32   None   
    id        text      (4, 1)      str     None   
 metadata     json      (4, 1)      str     None   
   text       text      (4, 1)      str     None   


 

['a3ea8c50-5716-11ee-b52f-86a3ad70e21d',
 'a3ea8fca-5716-11ee-b52f-86a3ad70e21d']

In [17]:

# create a retriever from the db
retrieval_qa = RetrievalQA.from_chain_type(
	llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

# instantiate a tool that uses the retriever
tools = [
    Tool(
        name="Retrieval QA System",
        func=retrieval_qa.run,
        description="Useful for answering questions."
    ),
]

# create an agent that uses the tool
agent = initialize_agent(
	tools,
	llm,
	agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
	verbose=True
)

In [18]:
response = agent.run("When was Michael Jordan born?")
print(response)



[1m> Entering new  chain...[0m
[32;1m[1;3mI should use the Retrieval QA System to find the answer to this question.
Action: Retrieval QA System
Action Input: "When was Michael Jordan born?"[0m
Observation: [36;1m[1;3mMichael Jordan was born on 17 February 1963.[0m
Thought:[32;1m[1;3mI now know the final answer
Final Answer: Michael Jordan was born on 17 February 1963.[0m

[1m> Finished chain.[0m
Michael Jordan was born on 17 February 1963.
