In [1]:
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from langchain_chroma import Chroma
from langchain_core.documents import Document

In [2]:
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [5]:
doc1 = Document(
    page_content='I am going to complete langchain playlist in one or max two days', metadata={'name' : 'Akshat'}
)

doc2 = Document(
    page_content='I am going to give JEE examination and hopefully secure a rank', metadata={'name': 'Suryansh'}
)

doc3 = Document(
    page_content='I am going to deliver a speech', metadata={'name': 'Rinkesh'}
)

doc4 = Document(
    page_content='I am going to buy a new car and house as soon as I retire', metadata={'name': 'Rakesh'}
)

In [6]:
docs = [doc1, doc2, doc3, doc4]

vector_store = Chroma(
    embedding_function=embedding,
    persist_directory='my_chroma_db',
    collection_name='home'
)

In [7]:
vector_store.add_documents(docs)

['7b7a844b-2eb3-4742-bf46-4a542d8ccdf7',
 'ba21a0d7-8fde-4a67-950a-d1f638a97a47',
 'ce207dca-a43f-4d42-bed4-e0d98e89bd3a',
 '6722a0fc-a300-497d-b3e8-6ba04d5290fb']

In [9]:
vector_store.get(include=['embeddings', 'documents', 'metadatas'])

{'ids': ['7b7a844b-2eb3-4742-bf46-4a542d8ccdf7',
  'ba21a0d7-8fde-4a67-950a-d1f638a97a47',
  'ce207dca-a43f-4d42-bed4-e0d98e89bd3a',
  '6722a0fc-a300-497d-b3e8-6ba04d5290fb'],
 'embeddings': array([[-0.01901352, -0.05546275,  0.04838766, ..., -0.03034673,
         -0.04740161,  0.00934086],
        [-0.07899651, -0.00689369,  0.04580976, ..., -0.07444284,
         -0.05586007, -0.02447816],
        [-0.03050794,  0.01791835,  0.06147718, ...,  0.00535265,
         -0.03546589, -0.11300997],
        [ 0.03372908,  0.00023485,  0.0092878 , ..., -0.07124814,
         -0.02551288,  0.0129735 ]], shape=(4, 384)),
 'documents': ['I am going to complete langchain playlist in one or max two days',
  'I am going to give JEE examination and hopefully secure a rank',
  'I am going to deliver a speech',
  'I am going to buy a new car and house as soon as I retire'],
 'uris': None,
 'included': ['embeddings', 'documents', 'metadatas'],
 'data': None,
 'metadatas': [{'name': 'Akshat'},
  {'name': 'S

In [13]:
vector_store.similarity_search(
    query='who is going to give JEE exam',
    k=1
)

[Document(id='ba21a0d7-8fde-4a67-950a-d1f638a97a47', metadata={'name': 'Suryansh'}, page_content='I am going to give JEE examination and hopefully secure a rank')]

In [None]:
# search with similarity score, less the score more will be similarity (cosine)
vector_store.similarity_search_with_score(
    query='who is watching youtube playlist',
    k=2
)

[(Document(id='7b7a844b-2eb3-4742-bf46-4a542d8ccdf7', metadata={'name': 'Akshat'}, page_content='I am going to complete langchain playlist in one or max two days'),
  1.296355962753296),
 (Document(id='ba21a0d7-8fde-4a67-950a-d1f638a97a47', metadata={'name': 'Suryansh'}, page_content='I am going to give JEE examination and hopefully secure a rank'),
  1.654639482498169)]

In [18]:
vector_store.similarity_search_with_score(
    query='',
    filter={'name': 'Rinkesh'}
)

[(Document(id='ce207dca-a43f-4d42-bed4-e0d98e89bd3a', metadata={'name': 'Rinkesh'}, page_content='I am going to deliver a speech'),
  1.5829249620437622)]

In [19]:
updated_doc1 = Document(
    page_content='Long term goal is to achieve financial independence',
    metadata={'name':'Akshat'}
)

In [23]:
vector_store.update_document(document_id='7b7a844b-2eb3-4742-bf46-4a542d8ccdf7', document=updated_doc1)

In [24]:
vector_store.get(include=['embeddings', 'documents'])

{'ids': ['7b7a844b-2eb3-4742-bf46-4a542d8ccdf7',
  'ba21a0d7-8fde-4a67-950a-d1f638a97a47',
  'ce207dca-a43f-4d42-bed4-e0d98e89bd3a',
  '6722a0fc-a300-497d-b3e8-6ba04d5290fb'],
 'embeddings': array([[ 0.016865  ,  0.02408506, -0.05250642, ..., -0.10575747,
          0.01545053, -0.03365779],
        [-0.07899651, -0.00689369,  0.04580976, ..., -0.07444284,
         -0.05586007, -0.02447816],
        [-0.03050794,  0.01791835,  0.06147718, ...,  0.00535265,
         -0.03546589, -0.11300997],
        [ 0.03372908,  0.00023485,  0.0092878 , ..., -0.07124814,
         -0.02551288,  0.0129735 ]], shape=(4, 384)),
 'documents': ['Long term goal is to achieve financial independence',
  'I am going to give JEE examination and hopefully secure a rank',
  'I am going to deliver a speech',
  'I am going to buy a new car and house as soon as I retire'],
 'uris': None,
 'included': ['embeddings', 'documents'],
 'data': None,
 'metadatas': None}

In [25]:
vector_store.delete(ids=['7b7a844b-2eb3-4742-bf46-4a542d8ccdf7'])

In [26]:
vector_store.get(include=['embeddings', 'documents'])

{'ids': ['ba21a0d7-8fde-4a67-950a-d1f638a97a47',
  'ce207dca-a43f-4d42-bed4-e0d98e89bd3a',
  '6722a0fc-a300-497d-b3e8-6ba04d5290fb'],
 'embeddings': array([[-0.07899651, -0.00689369,  0.04580976, ..., -0.07444284,
         -0.05586007, -0.02447816],
        [-0.03050794,  0.01791835,  0.06147718, ...,  0.00535265,
         -0.03546589, -0.11300997],
        [ 0.03372908,  0.00023485,  0.0092878 , ..., -0.07124814,
         -0.02551288,  0.0129735 ]], shape=(3, 384)),
 'documents': ['I am going to give JEE examination and hopefully secure a rank',
  'I am going to deliver a speech',
  'I am going to buy a new car and house as soon as I retire'],
 'uris': None,
 'included': ['embeddings', 'documents'],
 'data': None,
 'metadatas': None}