In [None]:
from pymilvus import connections

connections.connect(
    alias="default",
    uri="URL",  # Full HTTPS URI
    token="TOKEN",  # Format: user:password
)

print("✅ Connected to Zilliz Cloud Milvus.")

✅ Connected to Zilliz Cloud Milvus.


In [10]:
import os
import numpy as np
from google import genai
from google.genai.types import EmbedContentConfig

# Set up the Gemini client - ensure you have your API key set in environment variables
api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
    raise ValueError("Please set the GEMINI_API_KEY environment variable")

client = genai.Client(api_key=api_key)
embed_model = "text-embedding-004"

def get_embedding(text):
    """Get text embedding from Gemini API"""
    response = client.models.embed_content(
        model=embed_model,
        contents=[text],
        config=EmbedContentConfig(
            task_type="RETRIEVAL_QUERY" if len(text) < 100 else "RETRIEVAL_DOCUMENT",
            output_dimensionality=768,
        ),
    )
    return np.array(response.embeddings[0].values)

In [11]:
from pymilvus import Collection, FieldSchema, CollectionSchema, DataType

# Define schema
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=512),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768)
]

schema = CollectionSchema(fields, description="Text + vector collection")
collection = Collection(name="my_text_vectors", schema=schema)

In [12]:
from pymilvus import utility

collections = utility.list_collections()
print("Collections:", collections)

Collections: ['my_text_vectors']


In [13]:
import numpy as np

texts = ["hello world", "zilliz cloud vector", "chatgpt rocks"]
# vectors = np.random.rand(len(texts), 768).tolist()  # Simulate 768-dim vectors
vectors = []
for text in texts:
    vectors.append(get_embedding(text))
# Prepare data: [None for auto_id, texts, vectors]
data = [texts, vectors]
# data
collection.insert(data)
# collection.flush()

(insert count: 3, delete count: 0, upsert count: 0, timestamp: 457800906199007236, success count: 3, err count: 0, cost: 3)

In [26]:
np.shape(vectors[2])  # Check the shape of the first vector

(768,)

In [21]:
len(vectors)

3

In [None]:
collection.create_index(
    field_name="embedding",
    index_params={
        "metric_type": "L2",         # or "COSINE", "IP"
        "index_type": "IVF_FLAT",    # or "HNSW", "IVF_PQ", etc.
        "params": {"nlist": 128}
    }
)

collection.load()

current_search_term = "Hello world"
search_embedding = get_embedding(current_search_term)

search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
results = collection.search(
    search_embedding
    "embedding",
    search_params,
    limit=3,
    output_fields=["text"]
)

for result in results[0]:
    print("Matched text:", result.entity.get("text"), "score:", result.distance)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (2618038713.py, line 17)

In [9]:
from pymilvus import Collection

# Replace with your collection name
collection = Collection("my_text_vectors")
collection.drop()