In [2]:
!pip install chromadb  google-genai tiktoken pypdf langchain-google-genai langchain-community

Defaulting to user installation because normal site-packages is not writeable
Collecting chromadb
  Using cached chromadb-1.0.15-cp39-abi3-win_amd64.whl.metadata (7.1 kB)
Collecting google-genai
  Downloading google_genai-1.24.0-py3-none-any.whl.metadata (40 kB)
Collecting tiktoken
  Using cached tiktoken-0.9.0-cp313-cp313-win_amd64.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.1-cp313-cp313-win_amd64.whl.metadata (8.7 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.0-cp313-cp313-win_amd64.whl.metadata (5.0 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import Chroma
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain.schema import Document

doc1 = Document(
    page_content = "Virat kohli is one of the most successful and consistent batsmen in IPL history, known for his aggressive batting style",
    metadata = {"team":"Royal Challengers Bangalore"}
)
doc2 = Document(
    page_content = "Rohit Sharma is one of the most successful captain in IPL history, known for his aggressive batting style, leading Mumbai Indians to 5 titles. He is known for his calibre to hit big sixes effortlessly.",
    metadata = {"team":"Mumbai Indians"}
)
doc3 = Document(
    page_content = "MS Dhoni, famously knows as Captain Cool, has led CSK to multiple IPL titles. His finishing skills, wicket keeping skills makes him one of the most prolific cricketers of his time.",
    metadata = {"team":"Chennai Super Kings"}
)
doc4 = Document(
    page_content = "Jasprit Bumrah is considered as one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his accuracy and pace, especially with the yorker bowls.",
    metadata = {"team":"Mumbai Indians"}
)
doc5 = Document(
    page_content = "Ravindra Jadeja is dynamic allrounder who contributes with both bat and ball. Representing CSK in IPL, his quality of fielding is among the best of the fielders and due to his allround cricketing abilities in the field, he is given the tag of Sir Jadeja",
    metadata = {"team":"Chennai Super Kings"}
)
    

In [3]:
docs = [doc1,doc2, doc3, doc4, doc5]

In [23]:
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",  
    google_api_key = os.getenv("GOOGLE_API_KEY")
)
vector_store = Chroma(
    embedding_function = embeddings,
    persist_directory = 'chroma_db',
    collection_name = "sample"
)

In [24]:
vector_store.add_documents(docs)

['625f255a-c523-41a5-86c9-a7b4e676a2e6',
 'ac84a43b-a04b-402a-b743-a969942e96ab',
 '66decdf2-9a77-41df-9852-dac7673698bb',
 '425f6f44-6b2a-45e1-ac67-03daa6ffa5bd',
 '0c0af159-fe8b-489f-bba5-b4ca5073ee67']

In [25]:
#view docs
vector_store.get(include=['embeddings','documents','metadatas'])

{'ids': ['625f255a-c523-41a5-86c9-a7b4e676a2e6',
  'ac84a43b-a04b-402a-b743-a969942e96ab',
  '66decdf2-9a77-41df-9852-dac7673698bb',
  '425f6f44-6b2a-45e1-ac67-03daa6ffa5bd',
  '0c0af159-fe8b-489f-bba5-b4ca5073ee67'],
 'embeddings': array([[ 0.02612068, -0.01880962, -0.02984636, ...,  0.0294191 ,
          0.00073082, -0.00577578],
        [ 0.04396841, -0.04151214, -0.03090903, ...,  0.01698163,
          0.00581595, -0.01243031],
        [ 0.01817325, -0.01231558, -0.03203491, ..., -0.00527927,
         -0.06021297, -0.0161485 ],
        [ 0.01013633, -0.0644241 , -0.02855688, ...,  0.03769464,
         -0.01323913,  0.01982199],
        [ 0.01403887, -0.01745911, -0.00805529, ...,  0.01277197,
         -0.02314931,  0.00849224]]),
 'documents': ['Virat kohli is one of the most successful and consistent batsmen in IPL history, known for his aggressive batting style',
  'Rohit Sharma is one of the most successful captain in IPL history, known for his aggressive batting style, leading 

In [28]:
#search docs
vector_store.similarity_search(
    query = "Who among these is a allrounder?",
    k = 2
)

[Document(metadata={'team': 'Royal Challengers Bangalore'}, page_content='Virat kohli is one of the most successful and consistent batsmen in IPL history, known for his aggressive batting style'),
 Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered as one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his accuracy and pace, especially with the yorker bowls.')]

In [29]:
vector_store.similarity_search_with_score(
    query = "who among these are a bowler?",
    k = 2
)

[(Document(metadata={'team': 'Royal Challengers Bangalore'}, page_content='Virat kohli is one of the most successful and consistent batsmen in IPL history, known for his aggressive batting style'),
  0.4560298025608063),
 (Document(metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered as one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his accuracy and pace, especially with the yorker bowls.'),
  0.4818243384361267)]

In [30]:
vector_store.similarity_search_with_score(
    query = "",
   filter = {"team":"Chennai Super Kings"}
)

[(Document(metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni, famously knows as Captain Cool, has led CSK to multiple IPL titles. His finishing skills, wicket keeping skills makes him one of the most prolific cricketers of his time.'),
  0.5830461978912354),
 (Document(metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is dynamic allrounder who contributes with both bat and ball. Representing CSK in IPL, his quality of fielding is among the best of the fielders and due to his allround cricketing abilities in the field, he is given the tag of Sir Jadeja'),
  0.6985814571380615)]

In [33]:
updated_doc1 = Document(
    page_content = "Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistency in scoring runs.",
    metadata = {"team":"Royal Challengers Bangalore"}
)
vector_store.update_document(document_id = '625f255a-c523-41a5-86c9-a7b4e676a2e6',document = updated_doc1)

In [35]:
vector_store.get(include=['embeddings','documents','metadatas'])

{'ids': ['625f255a-c523-41a5-86c9-a7b4e676a2e6',
  'ac84a43b-a04b-402a-b743-a969942e96ab',
  '66decdf2-9a77-41df-9852-dac7673698bb',
  '425f6f44-6b2a-45e1-ac67-03daa6ffa5bd',
  '0c0af159-fe8b-489f-bba5-b4ca5073ee67'],
 'embeddings': array([[ 0.02183718, -0.02182764, -0.03336646, ...,  0.01467335,
         -0.02208548,  0.01869799],
        [ 0.04396841, -0.04151214, -0.03090903, ...,  0.01698163,
          0.00581595, -0.01243031],
        [ 0.01817325, -0.01231558, -0.03203491, ..., -0.00527927,
         -0.06021297, -0.0161485 ],
        [ 0.01013633, -0.0644241 , -0.02855688, ...,  0.03769464,
         -0.01323913,  0.01982199],
        [ 0.01403887, -0.01745911, -0.00805529, ...,  0.01277197,
         -0.02314931,  0.00849224]]),
 'documents': ['Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistency in scoring runs.',
  'Rohit Sharma is one of the most successful captain in IPL history, known for his aggressiv

In [36]:
vector_store.delete(ids = ['625f255a-c523-41a5-86c9-a7b4e676a2e6'])

In [37]:
vector_store.get(include = ['embeddings','documents','metadatas'])

{'ids': ['ac84a43b-a04b-402a-b743-a969942e96ab',
  '66decdf2-9a77-41df-9852-dac7673698bb',
  '425f6f44-6b2a-45e1-ac67-03daa6ffa5bd',
  '0c0af159-fe8b-489f-bba5-b4ca5073ee67'],
 'embeddings': array([[ 0.04396841, -0.04151214, -0.03090903, ...,  0.01698163,
          0.00581595, -0.01243031],
        [ 0.01817325, -0.01231558, -0.03203491, ..., -0.00527927,
         -0.06021297, -0.0161485 ],
        [ 0.01013633, -0.0644241 , -0.02855688, ...,  0.03769464,
         -0.01323913,  0.01982199],
        [ 0.01403887, -0.01745911, -0.00805529, ...,  0.01277197,
         -0.02314931,  0.00849224]]),
 'documents': ['Rohit Sharma is one of the most successful captain in IPL history, known for his aggressive batting style, leading Mumbai Indians to 5 titles. He is known for his calibre to hit big sixes effortlessly.',
  'MS Dhoni, famously knows as Captain Cool, has led CSK to multiple IPL titles. His finishing skills, wicket keeping skills makes him one of the most prolific cricketers of his ti