In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

In [2]:
from langchain.schema import Document

doc1 = Document(
    page_content="Virat Kohli is one of the most prolific batsmen in the world and a former captain of the Indian national team. He has been a key player for Royal Challengers Bangalore (RCB) since the inception of the IPL in 2008. Known for his aggressive batting style and unmatched consistency, Kohli holds the record for the most runs in IPL history.",
    metadata={"team": "Royal Challengers Bangalore"}
)

doc2 = Document(
    page_content="Rohit Sharma is the captain of Mumbai Indians and one of the most successful leaders in IPL history. Under his leadership, MI has won five IPL titles. Sharma is known for his elegant stroke play and ability to play big innings, making him one of the most dangerous openers in T20 cricket.",
    metadata={"team": "Mumbai Indians"}
)

doc3 = Document(
    page_content="MS Dhoni, also known as 'Captain Cool', is the iconic leader of Chennai Super Kings. Renowned for his calm demeanor, sharp cricketing brain, and finishing abilities, Dhoni has led CSK to five IPL titles. He is one of the most respected figures in Indian and world cricket.",
    metadata={"team": "Chennai Super Kings"}
)

doc4 = Document(
    page_content="Andre Russell is a powerful all-rounder from the West Indies who plays for Kolkata Knight Riders in the IPL. Known for his explosive batting and ability to change the course of a match within a few overs, Russell is also a handy bowler in the death overs.",
    metadata={"team": "Kolkata Knight Riders"}
)

doc5 = Document(
    page_content="Sanju Samson is a stylish wicketkeeper-batsman and the current captain of Rajasthan Royals. Known for his graceful strokeplay and calm presence on the field, Sanju has become a key player for RR and has shown potential to be a future star of Indian cricket.",
    metadata={"team": "Rajasthan Royals"}
)


In [3]:
docs = [doc1, doc2, doc3, doc4, doc5]

In [4]:
vector_store = Chroma(
    embedding_function = HuggingFaceEmbeddings(),
    persist_directory = "chroma_db",
    collection_name = "sample"
)

  from .autonotebook import tqdm as notebook_tqdm
  vector_store = Chroma(


In [5]:
# add documents

vector_store.add_documents(docs)

['e29446f3-e292-444e-9375-81d13b239357',
 'b3cc928b-9f1a-4a53-b19a-4c41d2ef7e56',
 '06551e62-28df-4adb-adfe-07303703a113',
 'be1811e4-e21f-4ce4-bbc4-426a044765c8',
 '155f4bc3-dd99-4c11-9ffb-4c8531e327df']

In [6]:
# view documents
vector_store.get(include=["embeddings", "documents", "metadatas"])

{'ids': ['e29446f3-e292-444e-9375-81d13b239357',
  'b3cc928b-9f1a-4a53-b19a-4c41d2ef7e56',
  '06551e62-28df-4adb-adfe-07303703a113',
  'be1811e4-e21f-4ce4-bbc4-426a044765c8',
  '155f4bc3-dd99-4c11-9ffb-4c8531e327df'],
 'embeddings': array([[-0.03445228, -0.01802882, -0.02549713, ...,  0.02815777,
         -0.03473779, -0.01223361],
        [-0.02663044, -0.01677458, -0.00829548, ...,  0.0181861 ,
         -0.0155395 , -0.01618778],
        [-0.02728821, -0.01352265, -0.01073592, ...,  0.04486597,
         -0.03755564,  0.00745331],
        [-0.03269751, -0.01256396, -0.00167778, ...,  0.00346175,
         -0.00644564,  0.01789847],
        [ 0.0008253 ,  0.01221396, -0.00996356, ...,  0.03173205,
         -0.01750319, -0.02061303]], shape=(5, 768)),
 'documents': ['Virat Kohli is one of the most prolific batsmen in the world and a former captain of the Indian national team. He has been a key player for Royal Challengers Bangalore (RCB) since the inception of the IPL in 2008. Known for 

In [7]:
# search documents
vector_store.similarity_search(
    query="who among these is a batsman?",
    k=2
)

[Document(metadata={'team': 'Royal Challengers Bangalore'}, page_content='Virat Kohli is one of the most prolific batsmen in the world and a former captain of the Indian national team. He has been a key player for Royal Challengers Bangalore (RCB) since the inception of the IPL in 2008. Known for his aggressive batting style and unmatched consistency, Kohli holds the record for the most runs in IPL history.'),
 Document(metadata={'team': 'Chennai Super Kings'}, page_content="MS Dhoni, also known as 'Captain Cool', is the iconic leader of Chennai Super Kings. Renowned for his calm demeanor, sharp cricketing brain, and finishing abilities, Dhoni has led CSK to five IPL titles. He is one of the most respected figures in Indian and world cricket.")]

In [8]:
# search with similarity score
vector_store.similarity_search_with_score(
    query = "who among these is a all rounder?",
    k=2
)

[(Document(metadata={'team': 'Kolkata Knight Riders'}, page_content='Andre Russell is a powerful all-rounder from the West Indies who plays for Kolkata Knight Riders in the IPL. Known for his explosive batting and ability to change the course of a match within a few overs, Russell is also a handy bowler in the death overs.'),
  1.1004161834716797),
 (Document(metadata={'team': 'Mumbai Indians'}, page_content='Rohit Sharma is the captain of Mumbai Indians and one of the most successful leaders in IPL history. Under his leadership, MI has won five IPL titles. Sharma is known for his elegant stroke play and ability to play big innings, making him one of the most dangerous openers in T20 cricket.'),
  1.1188393831253052)]

In [9]:
# meta data filtering
vector_store.similarity_search_with_score(
    query = "",
    filter = {'team': 'Chennai Super Kings'}
)

[(Document(metadata={'team': 'Chennai Super Kings'}, page_content="MS Dhoni, also known as 'Captain Cool', is the iconic leader of Chennai Super Kings. Renowned for his calm demeanor, sharp cricketing brain, and finishing abilities, Dhoni has led CSK to five IPL titles. He is one of the most respected figures in Indian and world cricket."),
  1.8490792512893677)]

In [10]:
# update documents
updated_doc1 = Document(
    page_content = "Royal Challengers Bangalore is a popular IPL team with star players like Virat Kohli. Though they haven't won the title, they remain a fan favorite.",
    metadata = {"team": "Royal Challengers Bangalore"}
)

vector_store.update_document(document_id='b8dcb314-b103-43be-964c-ef1eebafd491', document=updated_doc1)


In [11]:
# view documents
vector_store.get(include=["embeddings", "documents", "metadatas"])

{'ids': ['e29446f3-e292-444e-9375-81d13b239357',
  'b3cc928b-9f1a-4a53-b19a-4c41d2ef7e56',
  '06551e62-28df-4adb-adfe-07303703a113',
  'be1811e4-e21f-4ce4-bbc4-426a044765c8',
  '155f4bc3-dd99-4c11-9ffb-4c8531e327df'],
 'embeddings': array([[-0.03445228, -0.01802882, -0.02549713, ...,  0.02815777,
         -0.03473779, -0.01223361],
        [-0.02663044, -0.01677458, -0.00829548, ...,  0.0181861 ,
         -0.0155395 , -0.01618778],
        [-0.02728821, -0.01352265, -0.01073592, ...,  0.04486597,
         -0.03755564,  0.00745331],
        [-0.03269751, -0.01256396, -0.00167778, ...,  0.00346175,
         -0.00644564,  0.01789847],
        [ 0.0008253 ,  0.01221396, -0.00996356, ...,  0.03173205,
         -0.01750319, -0.02061303]], shape=(5, 768)),
 'documents': ['Virat Kohli is one of the most prolific batsmen in the world and a former captain of the Indian national team. He has been a key player for Royal Challengers Bangalore (RCB) since the inception of the IPL in 2008. Known for 

In [12]:
# delete document
vector_store.delete(ids=["b8dcb314-b103-43be-964c-ef1eebafd491"])

In [13]:
# view documents
vector_store.get(include=["embeddings", "documents", "metadatas"])

{'ids': ['e29446f3-e292-444e-9375-81d13b239357',
  'b3cc928b-9f1a-4a53-b19a-4c41d2ef7e56',
  '06551e62-28df-4adb-adfe-07303703a113',
  'be1811e4-e21f-4ce4-bbc4-426a044765c8',
  '155f4bc3-dd99-4c11-9ffb-4c8531e327df'],
 'embeddings': array([[-0.03445228, -0.01802882, -0.02549713, ...,  0.02815777,
         -0.03473779, -0.01223361],
        [-0.02663044, -0.01677458, -0.00829548, ...,  0.0181861 ,
         -0.0155395 , -0.01618778],
        [-0.02728821, -0.01352265, -0.01073592, ...,  0.04486597,
         -0.03755564,  0.00745331],
        [-0.03269751, -0.01256396, -0.00167778, ...,  0.00346175,
         -0.00644564,  0.01789847],
        [ 0.0008253 ,  0.01221396, -0.00996356, ...,  0.03173205,
         -0.01750319, -0.02061303]], shape=(5, 768)),
 'documents': ['Virat Kohli is one of the most prolific batsmen in the world and a former captain of the Indian national team. He has been a key player for Royal Challengers Bangalore (RCB) since the inception of the IPL in 2008. Known for 