# Vectorstore retrieval

In [None]:
import os
import openai
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

## Similarity Search

In [None]:
from langchain.vectorstores import Chroma
from langchain_openai.embeddings import OpenAIEmbeddings

persist_directory = 'docs/chroma/'

In [None]:
embeddings = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embeddings)

In [None]:
print(vectordb._collection.count())

In [None]:
texts = [
    """The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).""",
    """A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.""",
    """A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.""",
]

In [None]:
smalldb = Chroma.from_texts(texts, embedding=embeddings)

In [None]:
question = "Tell me about all-white mushrooms with large fruiting bodies"

In [None]:
smalldb.similarity_search(question, k=2)

In [None]:
smalldb.max_marginal_relevance_search(question,k=2,fetch_k=3)

## Addressing Diversity: Maximum marginal relevance

In [None]:
question = "what did they say about matlab?"
docs_ss = vectordb.similarity_search(question,k=3)

In [None]:
docs_ss[0].page_content[:100]

In [None]:
docs_mmr = vectordb.max_marginal_relevance_search(question,k=3)

In [None]:
docs_mmr[0].page_content[:100]