In [1]:
from pymilvus import MilvusClient
import numpy as np
import os
import cupy as cp
import json
from tqdm import tqdm

In [2]:

client = MilvusClient("./test_milvus/img2img.db")

index_params = client.prepare_index_params()



index_params.add_index(
    field_name="vector", 
    index_type="GPU_CAGRA",
    params={ "build_algo": "NN_DESCENT" }
)

client.create_collection(
    collection_name="img2img",
    dimension=1024,  # The vectors we will use in this demo has 384 dimensions
    metric_type="L2",
    vector_field_name="vector",
    auto_id=True,
    enable_dynamic_field=True,
    index_params=index_params
)



In [3]:
batch_index = 0
emb_dir = os.path.join(f"MDI_RAG_Image2Image_Research/data/cupy_embeddings_level1+/", f"cupy_embeddings_batch_{batch_index}.npy")
info_dir = os.path.join(f"MDI_RAG_Image2Image_Research/data/cupy_infos_level1+/", f"cupy_infos_batch_{batch_index}.json")
print(f"Reading cupy embeddings from {emb_dir}.")
if os.path.exists(emb_dir):
    embeddings = cp.load(os.path.join(emb_dir))

    embeddings = cp.asnumpy(embeddings).astype(np.float32)
else:
    print(f"Embeddings file not found for batch {batch_index}.")

print(f"Reading cupy infos from {info_dir}.")
if os.path.exists(info_dir):
    with open(info_dir, "r") as f:
        infos = json.load(f)
else:
    print(f"Infos file not found for batch {batch_index}.")

Reading cupy embeddings from MDI_RAG_Image2Image_Research/data/cupy_embeddings_level1+/cupy_embeddings_batch_0.npy.
Reading cupy infos from MDI_RAG_Image2Image_Research/data/cupy_infos_level1+/cupy_infos_batch_0.json.


In [4]:
data = []
for i in tqdm(range(len(infos))):
    data.append({"id": i, "vector": embeddings[i], "info": infos[i]})


100%|██████████| 3419683/3419683 [00:02<00:00, 1620448.96it/s]


In [8]:
client.insert(collection_name="img2img", data=data)

Unexpected error: [insert_rows], Error parsing message with type 'milvus.proto.schema.FieldData', <Time: {'RPC start': '2024-11-05 12:30:42.337345', 'Exception': '2024-11-05 12:39:07.634234'}>


MilvusException: <MilvusException: (code=1, message=Unexpected error, message=<Error parsing message with type 'milvus.proto.schema.FieldData'>)>

In [None]:
index = 8460
query = embeddings[index]
info = infos[index]
print(query, info)

In [9]:
for i in tqdm(range(len(embeddings))):
    image_embedding = embeddings[i]
    info = infos[i]
    client.insert("img2img",{"vector": image_embedding, "info": info})
# 8460


100%|██████████| 3419683/3419683 [9:25:40<00:00, 100.76it/s]   


In [10]:
query = np.random.rand(1, 1024).astype('float32')
if query.ndim == 1:
    query = query.reshape(1, -1)
if query.dtype != np.float32:
    query = query.astype('float32')
query

array([[0.72670674, 0.6917435 , 0.00941894, ..., 0.52380115, 0.07424912,
        0.39928713]], dtype=float32)

In [None]:
while True:
    query = np.random.rand(1, 1024).astype('float32')
    results = client.search(
        "img2img",
        data=query,
        output_fields=["info"],
        search_params={"metric_type": "L2"},
    )
    print(results)

KeyboardInterrupt: 

In [None]:

client.insert(collection_name="img2img", records=embeddings.tolist())
print("Insert vectors into Milvus successfully.")

In [None]:

vectors = [[ np.random.uniform(-1, 1) for _ in range(384) ] for _ in range(len(docs)) ]
data = [ {"id": i, "vector": vectors[i], "text": docs[i], "subject": "history"} for i in range(len(vectors)) ]
res = client.insert(
    collection_name="demo_collection",
    data=data
)

res = client.search(
    collection_name="demo_collection",
    data=[vectors[0]],
    filter="subject == 'history'",
    limit=2,
    output_fields=["text", "subject"],
)
print(res)

res = client.query(
    collection_name="demo_collection",
    filter="subject == 'history'",
    output_fields=["text", "subject"],
)
print(res)

res = client.delete(
    collection_name="demo_collection",
    filter="subject == 'history'",
)
print(res)
