# Pythonic Filters
This notebook demonstrates how to use pythonic filters to query your Chroma collection.
> Note: Pythonic filters currently apply only to `where` (metadata) filters.

In [1]:
import chromadb
client = chromadb.Client()
# Create a new chroma collection
collection_name = "filter_example_collection"
collection = client.create_collection(name=collection_name)
# Add some data to the collection
collection.add(
    embeddings=[
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
    ],
    metadatas=[
        {"status": "read"},
        {"status": "unread"},
        {"status": "read"},
        {"status": "unread"},
        {"status": "read"},
        {"status": "unread"},
        {"status": "read"},
        {"status": "unread"},
    ],
    documents=["A document that discusses domestic policy", "A document that discusses international affairs", "A document that discusses kittens", "A document that discusses dogs", "A document that discusses chocolate", "A document that is sixth that discusses government", "A document that discusses international affairs", "A document that discusses global affairs"],
    ids=["id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8"],
)
from chromadb.utils.query_helper import Filter

# Get documents that are read and about affairs
simple_query=collection.get(
    where=Filter.where("status"== "read"), 
    where_document={"$contains": "affairs"})

quoted=collection.get(
    where=Filter.where('"status"== "read"'),
    where_document={"$contains": "affairs"})

multi_line_quoted=collection.get(
    where=Filter.where('"status"'
                       '== "read"'),
    where_document={"$contains": "affairs"})

print(simple_query)
print(quoted)
print(multi_line_quoted)

{'ids': ['id7'], 'embeddings': None, 'metadatas': [{'status': 'read'}], 'documents': ['A document that discusses international affairs']}
{'ids': ['id7'], 'embeddings': None, 'metadatas': [{'status': 'read'}], 'documents': ['A document that discusses international affairs']}
{'ids': ['id7'], 'embeddings': None, 'metadatas': [{'status': 'read'}], 'documents': ['A document that discusses international affairs']}


# Where Filtering With Logical Operators
This section demonstrates how one can use the logical operators in `where` filtering.

Chroma currently supports: `$and` and `$or`operators.

> Note: Logical operators can be nested

In [4]:
# Or Logical Operator Filtering
# import chromadb
client = chromadb.Client()
collection = client.get_or_create_collection("test-where-list")
collection.add(documents=["Article by john", "Article by Jack", "Article by Jill"],
               metadatas=[{"author": "john"}, {"author": "jack"}, {"author": "jill"}], ids=["1", "2", "3"])

q1=collection.get(where={"$or": [{"author": "john"}, {"author": "jack"}]})
q2=collection.get(where=Filter.where("author"=="john" or "author"=="jack"))
print(q1)
print(q2)

Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Insert of existing embedding ID: 1
Insert of existing embedding ID: 2
Insert of existing embedding ID: 3


{'ids': ['1', '2'], 'embeddings': None, 'metadatas': [{'author': 'john'}, {'author': 'jack'}], 'documents': ['Article by john', 'Article by Jack']}
{'ids': ['1', '2'], 'embeddings': None, 'metadatas': [{'author': 'john'}, {'author': 'jack'}], 'documents': ['Article by john', 'Article by Jack']}


In [5]:
# And Logical Operator Filtering
collection = client.get_or_create_collection("test-where-list")
collection.upsert(documents=["Article by john", "Article by Jack", "Article by Jill"],
               metadatas=[{"author": "john","category":"chroma"}, {"author": "jack","category":"ml"}, {"author": "jill","category":"lifestyle"}], ids=["1", "2", "3"])
q1=collection.get(where={"$and": [{"category": "chroma"}, {"author": "john"}]})
q2=collection.get(where=Filter.where("category"=="chroma" and "author"=="john"))
print(q1)
print(q2)

{'ids': ['1'], 'embeddings': None, 'metadatas': [{'author': 'john', 'category': 'chroma'}], 'documents': ['Article by john']}
{'ids': ['1'], 'embeddings': None, 'metadatas': [{'author': 'john', 'category': 'chroma'}], 'documents': ['Article by john']}


In [7]:
# And logical that doesn't match anything
q1=collection.get(where={"$and": [{"category": "chroma"}, {"author": "jill"}]})
q2=collection.get(where=Filter.where("category"=="chroma" and "author"=="jill"))
print(q1)
print(q2)

{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}
{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}


In [9]:
# Combined And and Or Logical Operator Filtering
q1=collection.get(where={"$and": [{"category": "chroma"}, {"$or": [{"author": "john"}, {"author": "jack"}]}]})
q2=collection.get(where=Filter.where("category"=="chroma" and ("author"=="john" or "author"=="jack")))
print(q1)
print(q2)

{'ids': ['1'], 'embeddings': None, 'metadatas': [{'author': 'john', 'category': 'chroma'}], 'documents': ['Article by john']}
{'ids': ['1'], 'embeddings': None, 'metadatas': [{'author': 'john', 'category': 'chroma'}], 'documents': ['Article by john']}
