<a href="https://colab.research.google.com/github/amanmaurya7/langchain_collab/blob/main/vector_store.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
pip install -U langchain langchain-community chromadb sentence-transformers



In [30]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

In [31]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [32]:
#create langchain documents for IPL Players

documents = [
    Document(
        page_content=(
            "Virat Kohli is a right-handed batsman who plays for Royal Challengers Bangalore "
            "in the Indian Premier League. He is known for his consistency, aggressive batting "
            "style, and leadership qualities."
        ),
        metadata={
            "player_name": "Virat Kohli",
            "team": "Royal Challengers Bangalore",
            "role": "Batsman",
            "country": "India"
        }
    ),

    Document(
        page_content=(
            "MS Dhoni is a legendary wicket-keeper batsman and former captain of Chennai Super Kings. "
            "He is famous for his calm demeanor, finishing abilities, and tactical brilliance."
        ),
        metadata={
            "player_name": "MS Dhoni",
            "team": "Chennai Super Kings",
            "role": "Wicket-Keeper Batsman",
            "country": "India"
        }
    ),

    Document(
        page_content=(
            "Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. "
            "He is known for his elegant stroke play and record number of IPL titles as a captain."
        ),
        metadata={
            "player_name": "Rohit Sharma",
            "team": "Mumbai Indians",
            "role": "Batsman",
            "country": "India"
        }
    ),

    Document(
        page_content=(
            "Jasprit Bumrah is a fast bowler for Mumbai Indians. "
            "He is renowned for his unorthodox bowling action, deadly yorkers, "
            "and effectiveness in death overs."
        ),
        metadata={
            "player_name": "Jasprit Bumrah",
            "team": "Mumbai Indians",
            "role": "Bowler",
            "country": "India"
        }
    )
]


In [33]:
vector_store = Chroma(
    embedding_function = embeddings,
    persist_directory = 'chroma-db',
    collection_name = 'sample'
)

In [None]:
vector_store.add_documents(documents) # to add documents in the vector store

['012adede-bc2d-441d-96cb-380fce83e866',
 '679fadf5-f6d4-4241-8cef-002d524b2871',
 '8a2cd69a-c18b-405e-a520-af346e1cad51',
 '6f6296e9-1fa3-4ad4-835d-ef72bf2f0f8c']

In [34]:
vector_store.get(include=['embeddings', 'documents', 'metadatas']) #view documents

{'ids': ['679fadf5-f6d4-4241-8cef-002d524b2871',
  '8a2cd69a-c18b-405e-a520-af346e1cad51',
  '6f6296e9-1fa3-4ad4-835d-ef72bf2f0f8c'],
 'embeddings': array([[-0.04721664,  0.05314635,  0.01306524, ...,  0.00974675,
         -0.00468981, -0.04279274],
        [-0.02794789, -0.00139109, -0.02311316, ..., -0.00083611,
         -0.01834186, -0.00358348],
        [ 0.01430565, -0.0068773 , -0.05585813, ..., -0.09869233,
          0.00702672,  0.09551202]]),
 'documents': ['MS Dhoni is a legendary wicket-keeper batsman and former captain of Chennai Super Kings. He is famous for his calm demeanor, finishing abilities, and tactical brilliance.',
  'Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. He is known for his elegant stroke play and record number of IPL titles as a captain.',
  'Jasprit Bumrah is a fast bowler for Mumbai Indians. He is renowned for his unorthodox bowling action, deadly yorkers, and effectiveness in death overs.'],
 'uris': None,
 'included': ['e

In [None]:
# search doocuments
vector_store.similarity_search(
    query='who among there are a bowler',
    k=2
)

[Document(metadata={'team': 'Mumbai Indians', 'country': 'India', 'player_name': 'Jasprit Bumrah', 'role': 'Bowler'}, page_content='Jasprit Bumrah is a fast bowler for Mumbai Indians. He is renowned for his unorthodox bowling action, deadly yorkers, and effectiveness in death overs.'),
 Document(metadata={'country': 'India', 'team': 'Mumbai Indians', 'player_name': 'Rohit Sharma', 'role': 'Batsman'}, page_content='Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. He is known for his elegant stroke play and record number of IPL titles as a captain.')]

In [None]:
#search with similarity score
vector_store.similarity_search_with_score(
    query='who among there are a bowler',
    k=2
)

[(Document(metadata={'role': 'Bowler', 'country': 'India', 'team': 'Mumbai Indians', 'player_name': 'Jasprit Bumrah'}, page_content='Jasprit Bumrah is a fast bowler for Mumbai Indians. He is renowned for his unorthodox bowling action, deadly yorkers, and effectiveness in death overs.'),
  1.0588903427124023),
 (Document(metadata={'player_name': 'Rohit Sharma', 'role': 'Batsman', 'country': 'India', 'team': 'Mumbai Indians'}, page_content='Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. He is known for his elegant stroke play and record number of IPL titles as a captain.'),
  1.135572910308838)]

In [23]:
# meta-data filtering
vector_store.similarity_search_with_score(
    query="",
    filter={'team':'Mumbai Indians'}
)

[(Document(metadata={'role': 'Batsman', 'team': 'Mumbai Indians', 'player_name': 'Rohit Sharma', 'country': 'India'}, page_content='Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. He is known for his elegant stroke play and record number of IPL titles as a captain.'),
  1.8592255115509033),
 (Document(metadata={'team': 'Mumbai Indians', 'player_name': 'Jasprit Bumrah', 'role': 'Bowler', 'country': 'India'}, page_content='Jasprit Bumrah is a fast bowler for Mumbai Indians. He is renowned for his unorthodox bowling action, deadly yorkers, and effectiveness in death overs.'),
  1.8985458612442017)]

In [24]:
updated_doc1 =  Document(
    page_content = 'Virat Kholi, is the former captain of Royal Challengers Banglore (RCB), is renowned for his aggresive captainship and Batmanship',
    metadata={
            "player_name": "Virat Kohli",
            "team": "Royal Challengers Bangalore",
            "role": "Batsman",
            "country": "India"
        }
)

In [25]:
# update documents

vector_store.update_document(document_id='012adede-bc2d-441d-96cb-380fce83e866', document=updated_doc1)

In [26]:
vector_store.get(include=['embeddings', 'documents', 'metadatas']) #view documents

{'ids': ['012adede-bc2d-441d-96cb-380fce83e866',
  '679fadf5-f6d4-4241-8cef-002d524b2871',
  '8a2cd69a-c18b-405e-a520-af346e1cad51',
  '6f6296e9-1fa3-4ad4-835d-ef72bf2f0f8c'],
 'embeddings': array([[-0.02625495,  0.06150563, -0.02550211, ..., -0.071864  ,
          0.00226326, -0.01443336],
        [-0.04721664,  0.05314635,  0.01306524, ...,  0.00974675,
         -0.00468981, -0.04279274],
        [-0.02794789, -0.00139109, -0.02311316, ..., -0.00083611,
         -0.01834186, -0.00358348],
        [ 0.01430565, -0.0068773 , -0.05585813, ..., -0.09869233,
          0.00702672,  0.09551202]]),
 'documents': ['Virat Kholi, is the former captain of Royal Challengers Banglore (RCB), is renowned for his aggresive captainship and Batmanship',
  'MS Dhoni is a legendary wicket-keeper batsman and former captain of Chennai Super Kings. He is famous for his calm demeanor, finishing abilities, and tactical brilliance.',
  'Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL.

In [27]:
#delete documents
vector_store.delete(ids=['012adede-bc2d-441d-96cb-380fce83e866'])

In [28]:
vector_store.get(include=['embeddings', 'documents', 'metadatas']) #view documents

{'ids': ['679fadf5-f6d4-4241-8cef-002d524b2871',
  '8a2cd69a-c18b-405e-a520-af346e1cad51',
  '6f6296e9-1fa3-4ad4-835d-ef72bf2f0f8c'],
 'embeddings': array([[-0.04721664,  0.05314635,  0.01306524, ...,  0.00974675,
         -0.00468981, -0.04279274],
        [-0.02794789, -0.00139109, -0.02311316, ..., -0.00083611,
         -0.01834186, -0.00358348],
        [ 0.01430565, -0.0068773 , -0.05585813, ..., -0.09869233,
          0.00702672,  0.09551202]]),
 'documents': ['MS Dhoni is a legendary wicket-keeper batsman and former captain of Chennai Super Kings. He is famous for his calm demeanor, finishing abilities, and tactical brilliance.',
  'Rohit Sharma is an opening batsman representing Mumbai Indians in the IPL. He is known for his elegant stroke play and record number of IPL titles as a captain.',
  'Jasprit Bumrah is a fast bowler for Mumbai Indians. He is renowned for his unorthodox bowling action, deadly yorkers, and effectiveness in death overs.'],
 'uris': None,
 'included': ['e