https://medium.com/@chilldenaya/vector-database-introduction-and-python-implementation-4a6ac8518c6b

In [6]:
import chromadb
from chromadb.config import Settings
from pprint import pprint

db_path = "qchromaDB"

client_settings = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
    persist_directory=db_path,
    allow_reset=True,
)

client = chromadb.Client(client_settings)


In [11]:
client.reset()

True

In [12]:
from chromadb.utils import embedding_functions

EMBED_MODEL = "all-MiniLM-L6-v2"
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name=EMBED_MODEL
)

In [13]:
collection = client.create_collection(
    name="my-food-collection",
    metadata={"hnsw:space": "cosine"}, # l2 is the default
    embedding_function=embedding_func,
)

In [14]:
collection.add(
    # fill own embedding here:
    #embeddings=[[...]] 
    documents=[
        "Food may be made from plants or animals",
        "Some animals are meat-eaters and some are plant-eaters",
        "Cats and dogs usually eat food made from animals",
    ],
    metadatas=[{"topic": "food"}, {"topic": "animal"}, {"topic": "animal"}],
    ids=["doc1", "docs2", "doc3"],
)

In [15]:
collection.add(
    # fill own embedding here:
    #embeddings=[[...]] 
    documents=[
        "Vegetables are good for health",
        "Examples of vegetables are carrot, beans, and peas",
        "Panncakes are made from flour, eggs, and milk",
    ],
    metadatas=[{"topic": "food"}, {"topic": "food"}, {"topic": "food"}],
    ids=["doc11", "docs12", "doc13"],
)

In [16]:
results = collection.query(
    query_texts=["What does vegetable soup consist of?"],
    n_results=3,
)

In [17]:
pprint(results)

{'data': None,
 'distances': [[0.44762538572303656, 0.5321055021441002, 0.585007892134938]],
 'documents': [['Examples of vegetables are carrot, beans, and peas',
                'Vegetables are good for health',
                'Food may be made from plants or animals']],
 'embeddings': None,
 'ids': [['docs12', 'doc11', 'doc1']],
 'metadatas': [[{'topic': 'food'}, {'topic': 'food'}, {'topic': 'food'}]],
 'uris': None}


In [18]:
results = collection.query(
    query_texts=["What does dogs eat?"],
    n_results=3,
)

In [19]:
pprint(results)

{'data': None,
 'distances': [[0.2570904627449724, 0.47041244055997067, 0.5191561274471861]],
 'documents': [['Cats and dogs usually eat food made from animals',
                'Food may be made from plants or animals',
                'Some animals are meat-eaters and some are plant-eaters']],
 'embeddings': None,
 'ids': [['doc3', 'doc1', 'docs2']],
 'metadatas': [[{'topic': 'animal'}, {'topic': 'food'}, {'topic': 'animal'}]],
 'uris': None}
