In [1]:
import os
import pandas as pd
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict

In [2]:
class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file="../.env", env_file_encoding="utf-8", extra="ignore"
    )
    embedding_base_url: str
    embedding_api_key: str
    embedding_model: str

settings = Settings()
print(settings.embedding_model)

baai/bge-m3


In [4]:
class DBSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file="database/milvus_langchain/.env", env_file_encoding="utf-8", extra="ignore"
    )
    milvus_port: str
    milvus_webui_port: str

db_settings = DBSettings()
print(db_settings.milvus_port)

19530


# Prepare Embedder

In [5]:
import os
from langchain_openai import OpenAIEmbeddings

os.environ["OPENAI_API_BASE"] = "{}/v1/".format(settings.embedding_base_url)
embeddings = OpenAIEmbeddings(
    model=settings.embedding_model,
    api_key=settings.embedding_api_key
)
vectors = embeddings.embed_documents(["hello", "goodbye"])
len(vectors[0])

1024

# Prepare DB

In [6]:
from langchain_milvus import Milvus

In [8]:
uri = f"http://localhost:{db_settings.milvus_port}"
vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": uri},
    collection_name="test1"
)

In [9]:
vector_store

<langchain_milvus.vectorstores.milvus.Milvus at 0x12862f9a0>

# Test Docs

In [10]:
from uuid import uuid4
from langchain_core.documents import Document

In [11]:
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)
documents = [
    document_1,
    document_2
]
uuids = [str(uuid4()) for _ in range(len(documents))]

In [12]:
vector_store.add_documents(documents=documents, ids=uuids)

['4bd43dd4-3340-46f4-a733-f90acf50ad34',
 'eb1efad9-6d19-4217-9f1f-7ceff65133c2']

In [13]:
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy", k=2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* I had chocalate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet', 'pk': '4bd43dd4-3340-46f4-a733-f90acf50ad34'}]
* The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news', 'pk': 'eb1efad9-6d19-4217-9f1f-7ceff65133c2'}]


In [14]:
print("FILTERING WITH TWEET")
results = vector_store.similarity_search(
    "kitty", k=10, filter={"source": {"$in": ["tweet"]}}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")
    
print("FILTERING WITH NEWS")
results = vector_store.similarity_search(
    "kitty", k=10, filter={"source": {"$in": ["news"]}}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

FILTERING WITH TWEET
* I had chocalate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet', 'pk': '4bd43dd4-3340-46f4-a733-f90acf50ad34'}]
* The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news', 'pk': 'eb1efad9-6d19-4217-9f1f-7ceff65133c2'}]
FILTERING WITH NEWS
* I had chocalate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet', 'pk': '4bd43dd4-3340-46f4-a733-f90acf50ad34'}]
* The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news', 'pk': 'eb1efad9-6d19-4217-9f1f-7ceff65133c2'}]
