# Vector Store

Depending on vector store you want to use, you will need to install the appropriate package and get the API key. Navigate to the vector store that you want to use and follow the instructions there.

---

## Setup

In [1]:
import openai
import os
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())
openai.api_type = os.environ.get("OPENAI_API_TYPE")
openai.api_base = os.environ.get("OPENAI_API_BASE")
openai.api_key = os.environ.get("OPENAI_API_KEY")
openai.api_version = os.environ.get("OPENAI_API_VERSION")

## LLM

In [2]:
from langchain.chat_models import AzureChatOpenAI

llm = AzureChatOpenAI(
    deployment_name="gpt4",
    temperature=0,
)

## Vector Stores

### Deep Lake VectorStore
Make sure to follow the steps below:
- Signup/Login to [Activeloop](https://www.activeloop.ai/).
- Create API token from the home page.
- Copy the API token and paste it in the .env file like this `ACTIVELOOP_TOKEN="<token>"`.
- Restart the kernel and start execution from 1st cell.


#### Create db and add docs

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import RecursiveCharacterTextSplitter

embeddings = HuggingFaceEmbeddings()

texts = [
    "Napoleon Bonaparte was born in 15 August 1769",
    "Louis XIV was born in 5 September 1638",
]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts=texts)

# create Deep Lake dataset
# TODO: use your organization id here. (by default, org id is your username)
my_activeloop_org_id = os.environ.get("ACTIVELOOP_ORG_ID")
my_activeloop_dataset_name = "sample_1"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

#### Chain to query LLM with VectorStore

In [5]:
from langchain.chains import RetrievalQA

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

response = retrieval_qa.run("When was Napoleone born?")
response

'Napoleon Bonaparte was born on 15 August 1769.'

#### Agent to query LLM with VectorStore

In [6]:
from langchain.agents import Tool, initialize_agent, AgentType

tools = [
    Tool(name="retrieval_qa", func=retrieval_qa.run, description="Retrieval-based QA"),
]

agent = initialize_agent(
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    llm=llm,
    tools=tools,
    verbose=True,
)

agent.run("When was Napoleone born?")



[1m> Entering new  chain...[0m
[32;1m[1;3mI need to find out when Napoleon Bonaparte was born.
Action: retrieval_qa
Action Input: When was Napoleon Bonaparte born?[0m
Observation: [36;1m[1;3mNapoleon Bonaparte was born on 15 August 1769.[0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: Napoleon Bonaparte was born on 15 August 1769.[0m

[1m> Finished chain.[0m


'Napoleon Bonaparte was born on 15 August 1769.'

#### Add more data to VectorStore

In [None]:
from langchain.vectorstores import DeepLake

# load the existing Deep Lake dataset and specify the embedding function
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# create new documents
texts = [
    "Lady Gaga was born in 28 March 1986",
    "Michael Jeffrey Jordan was born in 17 February 1963",
]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.create_documents(texts)

# add documents to our Deep Lake dataset
db.add_documents(docs)

In [8]:
from langchain.chains import RetrievalQA

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=db.as_retriever()
)

retrieval_qa.run("When was Lady Gaga born?")

'Lady Gaga was born on 28 March 1986.'