# Analyze Vector DB

In [None]:
from langchain_chroma import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

## Specify the embedding model and vector DB

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
database_loc = ("my_vector_store")

vectorstore = Chroma(persist_directory=database_loc,
      embedding_function=embedding_model)

### (optional) Print the contents

In [None]:
all_docs = vectorstore.get()['documents']

print(f"docs: {len(all_docs)}")

# for idx, doc in enumerate(all_docs):
#     print(f"Document {idx + 1}:")
#     print(doc)
#     print("-" * 80)

## Run a similarity search

In [None]:
from typing import List
from langchain_core.runnables import chain
from langchain_core.documents import Document

@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*vectorstore.similarity_search_with_score(query, k=8))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [None]:
phrase = "When do my slices expire?"
embedding = HuggingFaceEmbeddings().embed_query(phrase)

results = retriever.invoke(phrase)
#print(results)

for result in results:
    print(result.metadata)