# Local Peristence Demo
This notebook demonstrates how to persist the in-memory version of Chroma to disk, then load it back in. 

In [1]:
import chromadb
from chromadb.config import Settings

In [3]:
# Create a new Chroma client with persistence enabled. 
persist_directory = "db"

client = chromadb.Client(
    Settings(
        persist_directory=persist_directory,
        chroma_db_impl="duckdb+parquet",
    )
)

# Srart from scratch
client.reset()

# Create a new chroma collection
collection_name = "peristed_collection"
collection = client.create_collection(name=collection_name)

Running Chroma using direct local API.
No existing DB found in db, skipping load
No existing DB found in db, skipping load


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Add some data to the collection
collection.add(
    embeddings=[
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
        [1.1, 2.3, 3.2],
        [4.5, 6.9, 4.4],
    ],
    metadatas=[
        {"uri": "img1.png", "style": "style1"},
        {"uri": "img2.png", "style": "style2"},
        {"uri": "img3.png", "style": "style1"},
        {"uri": "img4.png", "style": "style1"},
        {"uri": "img5.png", "style": "style1"},
        {"uri": "img6.png", "style": "style1"},
        {"uri": "img7.png", "style": "style1"},
        {"uri": "img8.png", "style": "style1"},
    ],
    documents=["doc1", "doc2", "doc3", "doc4", "doc5", "doc6", "doc7", "doc8"],
    ids=["id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8"],
)

In [5]:
# Persist the DB. This also happens automatically when the client is garbage collected.
# In a notebook, prefer to call persist explicitly.
client.persist()


Persisting DB to disk, putting it in the save folder db


True

In [6]:
# Create a new client with the same settings
client = chromadb.Client(
    Settings(
        persist_directory=persist_directory,
        chroma_db_impl="duckdb+parquet",
    )
)

# Load the collection
collection = client.get_collection(collection_name)

Running Chroma using direct local API.
loaded in 8 embeddings
loaded in 1 collections


In [7]:
# Query the collection
results = collection.query(
    query_embeddings=[[1.1, 2.3, 3.2]],
    n_results=1
)

print(results)

{'embeddings': [[[1.1, 2.3, 3.2]]], 'documents': [['doc5']], 'ids': [['id5']], 'metadatas': [[{'uri': 'img5.png', 'style': 'style1'}]], 'distances': [[0.0]]}


In [10]:
# Clean up
client.reset()
client.persist()

# You can also just delete the persist directory
!rm -rf db/

Persisting DB to disk, putting it in the save folder db
