In [10]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3")

In [11]:
embeddings.embed_query("Hello world")

[0.0009224242,
 -0.0047421954,
 0.012595663,
 -0.0106747495,
 0.015676681,
 0.001093811,
 0.016541665,
 0.008784665,
 -0.0005965259,
 0.0038157336,
 -0.010216084,
 0.0008299676,
 -0.00436616,
 0.019747095,
 -0.002259929,
 0.019692542,
 -0.0010452465,
 0.024230352,
 -0.00510795,
 0.014655236,
 -0.009424318,
 0.010444821,
 -0.0012137329,
 0.0055209156,
 -0.027045477,
 0.005641472,
 -0.008167045,
 0.012848489,
 0.00981208,
 0.002885494,
 -0.00076894084,
 0.035502944,
 -0.01375015,
 0.012590604,
 -0.010273985,
 0.011977496,
 -0.02389046,
 0.005937509,
 0.0016761476,
 -0.005192593,
 -0.010762704,
 -0.0006185321,
 0.0077448837,
 -0.010997557,
 0.029480392,
 0.0040683313,
 -0.0030803557,
 -0.020551834,
 0.0055688513,
 -0.03292393,
 0.017099164,
 -0.0065411204,
 0.003030976,
 8.782276e-05,
 -0.0068812687,
 -0.0052065603,
 -0.008849249,
 -0.027702844,
 -0.020384762,
 0.008906981,
 -0.00762194,
 -0.005969954,
 0.01736642,
 -0.01841934,
 0.01575862,
 0.00567771,
 0.020313064,
 0.0032231791,
 0.00

In [12]:
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for GROQ: ")

from langchain_groq import ChatGroq


llm = ChatGroq(model="llama-3.1-8b-instant")

In [13]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [14]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
    id=1,
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
    id=2,
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
    id=3,
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
    id=4,
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
    id=5,
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
    id=6,
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
    id=7,
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
    id=8,
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
    id=9,
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
    id=10,
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['5f9f90cf-ae7b-4919-80ea-65e0110c6a31',
 '858cb5bd-b83d-4db3-a0c5-8760cc724b9f',
 '06a45d0a-c784-4111-b563-218d71d6e5aa',
 '6b4e4d3a-1971-4fb7-bb4d-b5bb770272ac',
 '7420ddc8-931f-4810-9409-632c95c89959',
 'd95c4e89-7083-44e7-9bf3-2433713f1e00',
 '1347efc4-b15c-431b-99e4-9e7c892a1271',
 '8163202c-bc5a-4689-b73a-f4b8b1954b6c',
 'dc98f59a-f266-4889-ae0e-1f378376c962',
 'c54e5c9e-8a30-47ad-ba71-1fd1298f1e77']

In [15]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
