In [7]:
import os
import pprint
import matplotlib.pyplot as pyplot
from dotenv import load_dotenv

import chromadb
from chromadb.utils.embedding_functions import OpenCLIPEmbeddingFunction
from chromadb.utils.data_loaders import ImageLoader

New ingredients!

[CLIP](https://openai.com/index/clip/)

https://docs.trychroma.com/guides/multimodal


In [None]:
# Initiate file at folder "./chromadb" or load if it already exists
chroma_client = chromadb.PersistentClient(path="./chromadb/")

# Initiate image_loader helper
image_loader = ImageLoader()

# Initiate multimodal embedding function which supports text and images
multimodal_ef = OpenCLIPEmbeddingFunction()

collection_name = "m_db"

collection = chroma_client.get_or_create_collection(
    name=collection_name,
    embedding_function=multimodal_ef,
    data_loader=image_loader
    )

result = collection.get()

print(f"Collection {collection_name} created successfully -> {chroma_client.heartbeat()} nanoseconds")
pprint.pprint(result)

In [None]:
collection.add(
    ids=[
        '0', 
        '1'
        ],
    uris=[
        '../files/img/cat.png',
        '../files/img/dog.png'
        ]

)

collection.count()

In [4]:
def print_q_results(query_list: list, query_results: dict) -> None:
    result_count = len(query_results['ids'][0])

    for i in range(len(query_list)):
        print(f'Results for query -> {query_list[i]} <-\n')

        for j in range(result_count):
            id          = query_results['ids'][i][j]
            distance    = query_results['distances'][i][j]
            data        = query_results['data'][i][j]
            document    = query_results['documents'][i][j]
            metadata    = query_results['metadatas'][i][j]
            uri         = query_results['uris'][i][j]
            print(f'document id:    {id}')
            print(f'distance:       {distance}')
            print(f'metadata:       {metadata}')

            # Display image (file should be at the same folder as the notebook)
            # ImageLoader loads the image from file
            print(f'data:           {uri}')
            pyplot.imshow(data)
            pyplot.axis("off")
            pyplot.show()
            print('---')


In [None]:
query = ['dog']

q_result = collection.query(
    query_texts=query,
    n_results=5,
    include=['documents', 'distances', 'embeddings', 'metadatas', 'data', 'uris']
)

print_q_results(query, q_result)

In [None]:
collection.add(
    ids=[
        'soup_0',
        'soup_1',
        'soup_2',
        'soup_3',
        'soup_4'
        ],
    uris=[
        '../files/img/0_aisoup.png',
        '../files/img/1_borsch.png',
        '../files/img/2_cullen_skink.png',
        '../files/img/3_ramen.png',
        '../files/img/4_tom_yum.png'
        ],
    metadatas=[
        {'img_id': 'soup_0', 'category': 'course', 'ingredients': 'anthropic, python, evidently, chromadb, sqlite'},
        {'img_id': 'soup_1', 'category': 'soup', 'ingredients': 'beets, cabbage, potatoes, carrots, onions, garlic, beef, tomato paste, vinegar, sugar, bay leaves, sour cream, dill'},
        {'img_id': 'soup_2', 'category': 'soup', 'ingredients': 'smoked haddock, potatoes, onions, milk or cream, butter, parsley'},
        {'img_id': 'soup_3', 'category': 'soup', 'ingredients': 'ramen noodles, chicken broth, boiled eggs, green onions, nori (seaweed), bamboo shoots'},
        {'img_id': 'soup_4', 'category': 'soup', 'ingredients': 'shrimp seafood, lemongrass, kaffir lime leaves, galangal, Thai chilies, fish sauce, lime juice, mushrooms, tomatoes, cilantro'}
    ]
)

collection.count()

In [None]:
query = ['beef']

q_result = collection.query(
    query_texts=query,
    # where={'category': {'$eq': 'soup'}},
    n_results=5,
    include=['documents', 'distances', 'embeddings', 'metadatas', 'data', 'uris']
)

print_q_results(query, q_result)

In [None]:
chroma_client.delete_collection(collection_name)

list_collections = chroma_client.list_collections()

print(list_collections)