In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma

In [5]:
from langchain.schema import Document

# creating Langchain documents

doc1 = Document(
    page_content="Virat Kohi is the king of IPL",
    metadata={"team": "RCB"}
)

doc2 = Document(
    page_content="Rohit Sharma is good player",
    metadata = {"team": "MI"}
)

doc3 = Document(
    page_content="MSD is captain cool",
    metadata = {"team": "CSK"}
)

In [6]:

docs = [doc1, doc2, doc3]

In [7]:
vector_store = Chroma(
    embedding_function=GoogleGenerativeAIEmbeddings(
        model="gemini-embedding-001"
    ),
    persist_directory='chroma_db',
    collection_name='sample'
)

  vector_store = Chroma(


In [8]:
vector_store.add_documents(docs)

['5ef7c16b-c961-4a1e-8626-599343d1ecec',
 '741dabe2-43f9-4398-b312-1d0b243ab2b9',
 '9e68706c-1578-4e12-b837-28df240a868a']

In [10]:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['5ef7c16b-c961-4a1e-8626-599343d1ecec',
  '741dabe2-43f9-4398-b312-1d0b243ab2b9',
  '9e68706c-1578-4e12-b837-28df240a868a'],
 'embeddings': array([[-0.0211794 ,  0.00827243,  0.02611976, ...,  0.01753195,
         -0.0215484 , -0.01005369],
        [-0.02684962,  0.01051292,  0.02093918, ...,  0.00714837,
         -0.01413428, -0.00337904],
        [-0.01028546, -0.0128946 ,  0.01876419, ...,  0.01642916,
         -0.01794407, -0.00932398]], shape=(3, 3072)),
 'documents': ['Virat Kohi is the king of IPL',
  'Rohit Sharma is good player',
  'MSD is captain cool'],
 'uris': None,
 'included': ['embeddings', 'documents', 'metadatas'],
 'data': None,
 'metadatas': [{'team': 'RCB'}, {'team': 'MI'}, {'team': 'CSK'}]}

In [13]:
# similarity search
vector_store.similarity_search(
    query='Who among these is king',
    k = 1
)

[Document(metadata={'team': 'RCB'}, page_content='Virat Kohi is the king of IPL')]

In [14]:
#similarity search with score
vector_store.similarity_search_with_score(
    query='who is the king',
    k=1
)

[(Document(metadata={'team': 'RCB'}, page_content='Virat Kohi is the king of IPL'),
  0.43813788890838623)]

In [18]:
#metadata filtering
vector_store.similarity_search_with_score(
    query=" ",
    filter={"team":"CSK"}
)

[(Document(metadata={'team': 'CSK'}, page_content='MSD is captain cool'),
  0.4628012180328369)]

In [19]:
updated_doc1 = Document(
    page_content="Virat kohli is kinda agressive player",
    metadata={"team":"RCB"}
)

vector_store.update_document(document_id='5ef7c16b-c961-4a1e-8626-599343d1ecec', document=updated_doc1)

In [20]:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['5ef7c16b-c961-4a1e-8626-599343d1ecec',
  '741dabe2-43f9-4398-b312-1d0b243ab2b9',
  '9e68706c-1578-4e12-b837-28df240a868a'],
 'embeddings': array([[ 0.00318774,  0.002113  ,  0.02615281, ...,  0.01010655,
         -0.01207263, -0.00871894],
        [-0.02684962,  0.01051292,  0.02093918, ...,  0.00714837,
         -0.01413428, -0.00337904],
        [-0.01028546, -0.0128946 ,  0.01876419, ...,  0.01642916,
         -0.01794407, -0.00932398]], shape=(3, 3072)),
 'documents': ['Virat kohli is kinda agressive player',
  'Rohit Sharma is good player',
  'MSD is captain cool'],
 'uris': None,
 'included': ['embeddings', 'documents', 'metadatas'],
 'data': None,
 'metadatas': [{'team': 'RCB'}, {'team': 'MI'}, {'team': 'CSK'}]}