# Vector Database

## Demo code to store and search via a vector database
A quick demo code to store and search based on data from amazon-fine-food-reviews

Please ensure you have *.env* file in your HOME/Documents/src/openai/ folder

In [None]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import pandas as pd
import tiktoken
import os

env_path = os.getenv("HOME") + "/Documents/src/openai/.env"
load_dotenv(dotenv_path=env_path, verbose=True)

os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"
os.environ["AZURE_OPENAI_ENDPOINT"] = "https://pvg-azure-openai-uk-south.openai.azure.com"

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version="2023-05-15"
)

In [None]:
db_path = "data/amazon-food-reviews-faiss"
input_path = "data/fine_food_reviews_1k.csv"

In [None]:
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS


loader = CSVLoader(file_path=input_path, encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()

db = FAISS.from_documents(data, AzureOpenAIEmbeddings())

In [None]:
db.save_local(db_path)

In [None]:
new_db = FAISS.load_local(db_path, AzureOpenAIEmbeddings())

In [None]:
query = "bad packaging"
answer_list = new_db.similarity_search(query)
for ans in answer_list:
    print(ans.page_content + "\n")

In [None]:
retriever = db.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.75}
)
docs = retriever.get_relevant_documents("bad packaging")
for doc in docs:
    print(doc.page_content + "\n")

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI

llm = AzureChatOpenAI(model_name="gpt-35-turbo", temperature=0.3)
qa_chain = RetrievalQA.from_chain_type(llm,
             retriever=new_db.as_retriever(search_type="similarity_score_threshold",
               search_kwargs={"score_threshold": 0.75}))
qa_chain.combine_documents_chain.verbose = True
qa_chain.return_source_documents = True

In [None]:
qa_chain({"query": "bad packaging"})