In [1]:
# Install if needed
# !pip install --upgrade chromadb

In [2]:
import chromadb

In [3]:
client = chromadb.Client()

In [4]:
coll = client.create_collection(name='my_collection')

In [5]:
# Add embeddings
# Age, Networth, No of Children, Zipcode
coll.add (
    ids = ["11","12","13"],
    documents = ["Alice", "Bob", "Charlie"],
    metadatas = [{"gender":"woman"},{"gender":"man"},{"gender":"man"}],
    embeddings = [
        [20,100,0,12345],
        [40,200,3,23456],
        [80,50,2,34567]
    ]
)

In [6]:
# Get specific ID from the collection
results = coll.get (
    ids = ["11","12"]
)
results

{'ids': ['11', '12'],
 'embeddings': None,
 'metadatas': [{'gender': 'woman'}, {'gender': 'man'}],
 'documents': ['Alice', 'Bob'],
 'uris': None,
 'data': None}

In [7]:
# Query without any meatadata filter
results = coll.query(
    query_embeddings=[40,100,2,12345]
)
results

Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3


{'ids': [['11', '12', '13']],
 'distances': [[404.0, 123464320.0, 493821376.0]],
 'metadatas': [[{'gender': 'woman'}, {'gender': 'man'}, {'gender': 'man'}]],
 'embeddings': None,
 'documents': [['Alice', 'Bob', 'Charlie']],
 'uris': None,
 'data': None}

In [8]:
query = [40,100,2,12345]


In [9]:
# Search with a definitve query on metadata
results = coll.query(
    query_embeddings=[40,100,2,12345],
#    where={"gender":{"$in":["man","woman"]}}
    where={"gender":{"$eq":"man"}}
)
results

Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3


{'ids': [['12', '13']],
 'distances': [[123464320.0, 493821376.0]],
 'metadatas': [[{'gender': 'man'}, {'gender': 'man'}]],
 'embeddings': None,
 'documents': [['Bob', 'Charlie']],
 'uris': None,
 'data': None}

In [10]:
# Search documents for patterns (definitive; not predictive)
results = coll.query(
    query_embeddings=[40,100,2,12345],
    where_document={"$contains":"C"}
)
results

Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3


{'ids': [['13']],
 'distances': [[493821376.0]],
 'metadatas': [[{'gender': 'man'}]],
 'embeddings': None,
 'documents': [['Charlie']],
 'uris': None,
 'data': None}

In [11]:
#include only a specified number of fields
results = coll.query(
    query_embeddings=[40,100,2,12345],
    include=["documents","distances"]
)
results

Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3


{'ids': [['11', '12', '13']],
 'distances': [[404.0, 123464320.0, 493821376.0]],
 'metadatas': None,
 'embeddings': None,
 'documents': [['Alice', 'Bob', 'Charlie']],
 'uris': None,
 'data': None}

In [12]:
coll.upsert(
    ids=['4'],
    documents=['Denise'],
    metadatas=[{"gender":"woman"}],
    embeddings=[[40,100,0,23456]]
)

In [13]:
coll.peek()

{'ids': ['11', '12', '13', '4'],
 'embeddings': [[20.0, 100.0, 0.0, 12345.0],
  [40.0, 200.0, 3.0, 23456.0],
  [80.0, 50.0, 2.0, 34567.0],
  [40.0, 100.0, 0.0, 23456.0]],
 'metadatas': [{'gender': 'woman'},
  {'gender': 'man'},
  {'gender': 'man'},
  {'gender': 'woman'}],
 'documents': ['Alice', 'Bob', 'Charlie', 'Denise'],
 'uris': None,
 'data': None}

In [14]:
# Add another meatadata element -- level
coll.update (
    ids = ["11","12","13"],
    documents = ["Alice", "Bob", "Charlie"],
    metadatas = [{"gender":"woman","level":3},{"gender":"man","level":2},{"gender":"man","level":1}],
    embeddings = [
        [20,100,0,12345],
        [40,200,3,23456],
        [80,50,2,34567]
    ]
)

In [15]:
# Search for embeddings with level >= 2
results = coll.query(
    query_embeddings=[40,100,2,12345],
    where={"level":{"$gte":2}}
)
results

Number of requested results 10 is greater than number of elements in index 4, updating n_results = 4


{'ids': [['11', '12']],
 'distances': [[404.0, 123464320.0]],
 'metadatas': [[{'gender': 'woman', 'level': 3},
   {'gender': 'man', 'level': 2}]],
 'embeddings': None,
 'documents': [['Alice', 'Bob']],
 'uris': None,
 'data': None}

In [16]:
# Search for embeddings with level >= 2 and gender = "man"
results = coll.query(
    query_embeddings=[40,100,2,12345],
    where={
        "$and":[
            {"level":{"$gte":2}},
            {"gender":{"$eq":"man"}}
        ]
    }
)
results

Number of requested results 10 is greater than number of elements in index 4, updating n_results = 4


{'ids': [['12']],
 'distances': [[123464320.0]],
 'metadatas': [[{'gender': 'man', 'level': 2}]],
 'embeddings': None,
 'documents': [['Bob']],
 'uris': None,
 'data': None}

In [17]:
coll.get(
    limit=1,
    offset=2
)

{'ids': ['13'],
 'embeddings': None,
 'metadatas': [{'gender': 'man', 'level': 1}],
 'documents': ['Charlie'],
 'uris': None,
 'data': None}

In [18]:
coll.peek()

{'ids': ['11', '12', '13', '4'],
 'embeddings': [[20.0, 100.0, 0.0, 12345.0],
  [40.0, 200.0, 3.0, 23456.0],
  [80.0, 50.0, 2.0, 34567.0],
  [40.0, 100.0, 0.0, 23456.0]],
 'metadatas': [{'gender': 'woman', 'level': 3},
  {'gender': 'man', 'level': 2},
  {'gender': 'man', 'level': 1},
  {'gender': 'woman'}],
 'documents': ['Alice', 'Bob', 'Charlie', 'Denise'],
 'uris': None,
 'data': None}