In [1]:
!pip install langchain chromadb huggingface tiktoken pypdf langchain_huggingface langchain_community



In [2]:
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace, HuggingFacePipeline
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

In [3]:
!pip install transformers



In [4]:
MODEL_ID = MODEL_ID = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"


In [5]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [6]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    dtype="auto"
)

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [7]:
pipe = pipeline(
    model = model,
    task = "text-generation",
    tokenizer = tokenizer,
    max_new_tokens = 512,
    temperature = 0.5
)

Device set to use cuda:0


In [None]:
doc1 = Document(page_content = "A master of chase with nerves of steel, His bat writes records that the world can feel.",
                metadata = {"team":"RCB"})

doc2 = Document(page_content = "The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.",
                metadata = {"team":"MI"})

doc3 = Document(page_content = "With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.",
                metadata = {"team":"MI"})

doc4 = Document(page_content = "A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.",
                metadata = {"team":"CSK"})

doc5 = Document(page_content = "Calm as the moon in the fiercest fight, He finishes games with thunder and quiet might.",
                metadata = {"team":"CSK"})


In [11]:
docs = [doc1,doc2,doc3,doc4,doc5]

In [12]:
vector_store = Chroma(
    embedding_function = HuggingFaceEmbeddings(),
    persist_directory = "my_chroma_db",
    collection_name = "sample"
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  vector_store = Chroma(


In [13]:
vector_store.add_documents(docs)

['d74cb28c-8003-42e7-bf95-82fdf8d817d2',
 '8bada683-a303-48c9-9bb4-818235a1135a',
 'f61b6979-8a74-461f-8267-72ddb4ede3d1',
 '5d5f8f1e-1be8-48ba-a20e-4c43b5338e46',
 '94133149-79cf-4c28-b88e-8486154aa7f4']

In [16]:
#view docs

vector_store.get(include=["embeddings","documents","metadatas"])

{'ids': ['d74cb28c-8003-42e7-bf95-82fdf8d817d2',
  '8bada683-a303-48c9-9bb4-818235a1135a',
  'f61b6979-8a74-461f-8267-72ddb4ede3d1',
  '5d5f8f1e-1be8-48ba-a20e-4c43b5338e46',
  '94133149-79cf-4c28-b88e-8486154aa7f4'],
 'embeddings': array([[-0.02506439,  0.00946242,  0.01786248, ...,  0.04337338,
         -0.00282027, -0.01686628],
        [-0.03618991, -0.03677361,  0.01884305, ...,  0.00919379,
          0.00496385, -0.00470378],
        [-0.05579563,  0.00279139, -0.00910353, ...,  0.01178465,
         -0.02491318, -0.01121984],
        [-0.05836144, -0.02452241,  0.02474626, ...,  0.03349748,
          0.02543841, -0.02602577],
        [ 0.03578585, -0.01704367, -0.00321666, ...,  0.01150414,
         -0.00818546, -0.03077701]]),
 'documents': ['A master of chase with nerves of steel, His bat writes records that the world can feel.',
  'The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.',
  'With fire in his run-up and magic in his seam, He br

In [17]:
# Search Docs

vector_store.similarity_search(query="Who among these are bowlers?",
                               k=2)

[Document(metadata={'team': 'MI'}, page_content='With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.'),
 Document(metadata={'team': 'CSK'}, page_content='A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.')]

In [18]:
# Search with similarity Score

vector_store.similarity_search_with_score(query="Who among these are bowlers?",
                                          k=2)

[(Document(metadata={'team': 'MI'}, page_content='With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.'),
  1.333247423171997),
 (Document(metadata={'team': 'CSK'}, page_content='A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.'),
  1.4341808557510376)]

In [19]:
# Metadata filtering

vector_store.similarity_search_with_score(query="", filter={"team":"CSK"})

[(Document(metadata={'team': 'CSK'}, page_content='A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhere.'),
  1.768603801727295),
 (Document(metadata={'team': 'CSK'}, page_content='Calm as the moon in the fiercest fight, He finishes games with thunder and quiet might.'),
  1.8484768867492676)]

In [21]:

updated_doc5 = Document(page_content="Calm as the moon in the fiercest fight, He finishes games with thunder and quiet might.",
                        metadata={"team":"LSG"})

vector_store.update_document(document_id='94133149-79cf-4c28-b88e-8486154aa7f4',
                             document=updated_doc5)

In [22]:
vector_store.get(include=["embeddings","documents","metadatas"])

{'ids': ['d74cb28c-8003-42e7-bf95-82fdf8d817d2',
  '8bada683-a303-48c9-9bb4-818235a1135a',
  'f61b6979-8a74-461f-8267-72ddb4ede3d1',
  '5d5f8f1e-1be8-48ba-a20e-4c43b5338e46',
  '94133149-79cf-4c28-b88e-8486154aa7f4'],
 'embeddings': array([[-0.02506439,  0.00946242,  0.01786248, ...,  0.04337338,
         -0.00282027, -0.01686628],
        [-0.03618991, -0.03677361,  0.01884305, ...,  0.00919379,
          0.00496385, -0.00470378],
        [-0.05579563,  0.00279139, -0.00910353, ...,  0.01178465,
         -0.02491318, -0.01121984],
        [-0.05836144, -0.02452241,  0.02474626, ...,  0.03349748,
          0.02543841, -0.02602577],
        [ 0.03578587, -0.01704366, -0.00321667, ...,  0.01150412,
         -0.00818547, -0.03077701]]),
 'documents': ['A master of chase with nerves of steel, His bat writes records that the world can feel.',
  'The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.',
  'With fire in his run-up and magic in his seam, He br

In [23]:
# Delete Documents

vector_store.delete(ids=['94133149-79cf-4c28-b88e-8486154aa7f4'])

In [24]:
vector_store.get(include=["embeddings","documents","metadatas"])

{'ids': ['d74cb28c-8003-42e7-bf95-82fdf8d817d2',
  '8bada683-a303-48c9-9bb4-818235a1135a',
  'f61b6979-8a74-461f-8267-72ddb4ede3d1',
  '5d5f8f1e-1be8-48ba-a20e-4c43b5338e46'],
 'embeddings': array([[-0.02506439,  0.00946242,  0.01786248, ...,  0.04337338,
         -0.00282027, -0.01686628],
        [-0.03618991, -0.03677361,  0.01884305, ...,  0.00919379,
          0.00496385, -0.00470378],
        [-0.05579563,  0.00279139, -0.00910353, ...,  0.01178465,
         -0.02491318, -0.01121984],
        [-0.05836144, -0.02452241,  0.02474626, ...,  0.03349748,
          0.02543841, -0.02602577]]),
 'documents': ['A master of chase with nerves of steel, His bat writes records that the world can feel.',
  'The Hitman rises with elegance and might, Turning every loose ball into a dazzling sight.',
  'With fire in his run-up and magic in his seam, He breaks stumps and hopes with precision supreme.',
  'A warrior who spins, fields, and swings with flair,He lifts his team with brilliance everywhe