# Chroma DB Demo

Chroma is a AI-native open-source vector database focused on developer productivity and happiness. Chroma is licensed under Apache 2.0.

https://docs.trychroma.com/guides

Diferencias chromadb vs pinecone

# Load dependencies

In [None]:
pip install langchain langchain_community langchain_chroma sentence_transformers  langchain-openai

In [None]:
import langchain
import os
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import CharacterTextSplitter
import logging

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

logging.basicConfig(level=logging.DEBUG,
                    format='[%(levelname)s] - %(message)s ',
                    handlers=[
                        logging.FileHandler('/content/langchaindemo.log', mode='w'),
                        logging.StreamHandler(),
                    ],
                    force=True)
logger = logging.getLogger(__name__)
logger.info("Langchain Demo Initialized")

[INFO] - Langchain Demo Initialized 


# Create Documents from scratch

In [None]:
from langchain_core.documents import Document

docs = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [None]:
# Split docs into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
chunks = text_splitter.split_documents(docs)

# Init embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Load them into Chroma
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding_function
)


In [None]:
# Prompt
query = "i want to eat cheese"
docs = vectorstore.similarity_search(query)

print(docs[0].page_content)

Goldfish are popular pets for beginners, requiring relatively simple care.


In [None]:
docs

[Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

# Persist to disk


In [None]:
# save to disk
vectorstore_disk = Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db")
docs = vectorstore_disk.similarity_search(query)
docs

[INFO] - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information. 
[DEBUG] - Starting component System 
[DEBUG] - Starting component Posthog 
[DEBUG] - Starting component OpenTelemetryClient 
[DEBUG] - Starting component SqliteDB 
[DEBUG] - Starting component QuotaEnforcer 
[DEBUG] - Starting component LocalSegmentManager 
[DEBUG] - Starting component SegmentAPI 
[DEBUG] - Resetting dropped connection: us-api.i.posthog.com 
[DEBUG] - Starting component PersistentLocalHnswSegment 


[Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

[DEBUG] - https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15 


In [None]:
# load from disk
vectorstore_disk_loaded = Chroma(persist_directory="./chroma_db", embedding_function=embedding_function)
docs = vectorstore_disk.similarity_search(query)
print(docs[0].page_content)

[DEBUG] - Collection langchain already exists, returning existing collection. 


Goldfish are popular pets for beginners, requiring relatively simple care.
