In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import getpass
import os

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [3]:
def drop_db_with_collections(db_name):
    try:
        client.use_database(db_name)
        for collection in client.list_collections():
            client.drop_collection(collection)
    
        client.drop_database(db_name)
    
        return True

    except Exception as e:
        print(e.message)
        return False

In [14]:
from pymilvus import MilvusClient

base_url = "host.docker.internal"
URI = f"http://{base_url}:19530"
client = MilvusClient(
    uri=URI,
)

db_name="milvus_example"

# drop_db_with_collections(db_name)

try:
    client.create_database(db_name=db_name)
except Exception as e:
    print(e.message)
    
client.list_databases()

2025-05-25 10:23:48,267 [ERROR][handler]: RPC error: [create_database], <MilvusException: (code=65535, message=database already exist: milvus_example)>, <Time:{'RPC start': '2025-05-25 10:23:48.265455', 'RPC error': '2025-05-25 10:23:48.267879'}> (decorators.py:140)


database already exist: milvus_example


['milvus_quick_start',
 'langchain_example',
 'milvus_demo',
 'milvus_example',
 'default']

In [15]:
from langchain_milvus import Milvus

vector_store = Milvus(
    embedding_function=embeddings,
    connection_args={"uri": f"http://{base_url}:19530", "db_name": db_name},
    index_params={"index_type": "FLAT", "metric_type": "L2"},
    # auto_id=True,
)

In [16]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

In [17]:
ids = vector_store.add_documents(documents=documents, ids=uuids)
ids

['a22c84ae-8308-41ea-9022-d91ab40c8abe',
 '9fa7bbab-a027-431a-9749-19bf2b7e9db8',
 '81d0ac68-bdbc-4c24-bb46-f4c4151cf2c1',
 '15189f45-234d-4d49-9a70-4f488e5eb965',
 'b48abbaa-089f-459e-9459-15d67f3e1f58',
 'f0961880-3cd8-45e1-8eb7-235aa321606e',
 '92cd8245-b457-4595-8b8d-3cbf73683a3c',
 '7916739d-91bd-44ac-9801-3bf23cbb52b9',
 '7c1a816f-72b9-4080-8f4e-539166750413',
 'a05a4389-ad6b-42a8-9c4b-029da42a3406']

In [18]:
from langchain_milvus import BM25BuiltInFunction, Milvus
from langchain_openai import OpenAIEmbeddings

vectorstore = Milvus.from_documents(
    documents=documents,
    embedding=OpenAIEmbeddings(),
    builtin_function=BM25BuiltInFunction(),
    # `dense` is for OpenAI embeddings, `sparse` is the output field of BM25 function
    vector_field=["dense", "sparse"],
    connection_args={
        "uri": URI,
    },
    consistency_level="Strong",
    drop_old=True,
)

In [24]:
vectorstore.similarity_search_with_score("chocolate")

[(Document(metadata={'source': 'tweet', 'pk': 458266833003756618}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
  1.4779618978500366),
 (Document(metadata={'source': 'news', 'pk': 458266833003756621}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
  0.6912291646003723),
 (Document(metadata={'source': 'tweet', 'pk': 458266833003756622}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
  0.6895994544029236),
 (Document(metadata={'source': 'website', 'pk': 458266833003756624}, page_content='The top 10 soccer players in the world right now.'),
  0.6816434264183044)]

In [25]:
vectorstore.fields

['text', 'pk', 'dense', 'sparse', 'source']