# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

This section shows you how to upload Vectors into a new Milvus Collection and run simple search queries using the official Milvus client library. In this example, you use a dataset from a CSV file that contains a list of books in different genres. Milvus will serve as a search engine.

Install **kubectl** and the **Google Cloud SDK** with the necessary authentication plugin for Google Kubernetes Engine (GKE).

In [None]:
%%bash

curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
apt-get update && apt-get install apt-transport-https ca-certificates gnupg
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo gpg --dearmor -o /usr/share/keyrings/cloud.google.gpg
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
apt-get update && sudo apt-get install google-cloud-cli-gke-gcloud-auth-plugin

**Replace** \<CLUSTER_NAME> with your cluster name, e.g. milvus-cluster. Retrieve the GKE cluster's credentials using the gcloud command.

In [None]:
%%bash

export KUBERNETES_CLUSTER_NAME=milvus-cluster
gcloud container clusters get-credentials $KUBERNETES_CLUSTER_NAME --region $GOOGLE_CLOUD_REGION

Download the dataset from Git.

In [None]:
%%bash

export DATASET_PATH=https://raw.githubusercontent.com/GoogleCloudPlatform/kubernetes-engine-samples/refs/heads/main/databases/qdrant/manifests/04-notebook/dataset.csv
curl -s -LO $DATASET_PATH

Create an .env file with environment variables required for connecting to Milvus in a Kubernetes cluster.

In [None]:
%%bash

echo MILVUS_ENDPOINT=$(kubectl get svc milvus-ilb -n milvus --output jsonpath="{.status.loadBalancer.ingress[0].ip}") > .env


Install a Milvus client library:

In [None]:
! pip install pymilvus fastembed python-dotenv

Import Python libraries:

In [None]:
import os
import csv
import json
from pymilvus import MilvusClient, DataType
from fastembed import TextEmbedding
from typing import List
import numpy as np
from dotenv import load_dotenv

Load and prepare data from a CSV file for inserting it into a Milvus collection:

In [None]:
books = [*csv.DictReader(open('/content/dataset.csv'))]
descriptions = [doc["description"] for doc in books]
embedding_model = TextEmbedding(model_name="BAAI/bge-small-en")
embeddings: List[np.ndarray] = list(embedding_model.embed(descriptions))


Define a Milvus connection, it requires credentials (default are root:Milvus) for authentication:

In [None]:
load_dotenv()
uri = "http://"+os.getenv("MILVUS_ENDPOINT")+":19530"
client = MilvusClient(
    uri=uri,
    token="root:Milvus"
)

schema = MilvusClient.create_schema(
    auto_id=True,
    enable_dynamic_field=True,
)

schema.add_field(field_name="id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="vector", datatype=DataType.FLOAT_VECTOR, dim=384)
schema.add_field(field_name="description", datatype=DataType.VARCHAR, max_length=4096)
schema.add_field(field_name="title", datatype=DataType.VARCHAR, max_length=512)
schema.add_field(field_name="author", datatype=DataType.VARCHAR, max_length=512)

index_params = client.prepare_index_params()
index_params.add_index(
    field_name="vector",
    index_type="AUTOINDEX",
    metric_type="COSINE"
)

client.create_collection(
    collection_name="books",
    schema=schema,
    index_params=index_params
)

Prepare data for uploading:

In [None]:
documents: list[dict[str, any]] = []

for i, doc in enumerate(books):
    book = doc
    book["vector"] = embeddings[i]
    documents.append(book)

Insert data into the Milvus collection:

In [None]:
client.insert(
    collection_name="books",
    data=documents
)

Define the query function. Fastembed converts the text query into an embedding, and Milvus performs a vector search and displays results.

It prints each result separated by a line of dashes, in the following format :

- Title: Title of the book, Author: Author of the book, Vector distance
- Description: As stored in your document's description metadata field

In [None]:
def handle_query(query, limit):
    query_vector = list(embedding_model.embed([query]))[0]
    response = client.search(
        collection_name="books",
        data=[query_vector],
        anns_field="vector",
        limit=limit,
        output_fields=["description","title","author"]
    )

    for hit in response[0]:
        print("Title: {}, Author: {}, distance: {}".format(hit["entity"]["title"], hit["entity"]["author"], hit["distance"]))
        print(hit["entity"]["description"])
        print("---------")

Run the query `drama about people and unhappy love`:

In [None]:
handle_query("drama about people and unhappy love", 2)