In [1]:
import torch
from modelscope import AutoModel

MODEL_NAME = "/home/public/dkx/model/BAAI/BGE-VL-v1.5-zs"

model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
model.eval()
model.cuda()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LLaVANextForEmbedding(
  (vision_tower): CLIPVisionModel(
    (vision_model): CLIPVisionTransformer(
      (embeddings): CLIPVisionEmbeddings(
        (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False)
        (position_embedding): Embedding(577, 1024)
      )
      (pre_layrnorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (encoder): CLIPEncoder(
        (layers): ModuleList(
          (0-23): 24 x CLIPEncoderLayer(
            (self_attn): CLIPSdpaAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (layer_norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (mlp): CLIPMLP(
              (activation_fn): QuickGELU

In [2]:
from pymilvus import MilvusClient

client = MilvusClient(
    uri="http://localhost:19530"
)

client.load_collection("test")

In [3]:
client.list_collections()

['bar', 'test']

In [4]:
with torch.no_grad():
    model.set_processor(MODEL_NAME)

    # box 34
    box34_inputs = model.data_process(
        text="The crop production data for barley and rye in the specified region reveals notable differences. Barley shows a wider range of production with high variability, indicated by the presence of multiple outliers around 10,000 kilograms per hectare. The median production of barley is higher than that of rye, with rye displaying a more concentrated range of values and fewer outliers. Overall, barley exhibits greater potential for high yield but with more variability, while rye production appears more consistent.",
        q_or_c="q",
        task_instruction="Recommend the most suitable chart with corresponding description for visualizing the information given by the provided text: "
    )

    box34_embs = model(**box34_inputs, output_hidden_states=True)[:, -1, :]

    box34_embs = torch.nn.functional.normalize(box34_embs, dim=-1)

    print(len(box34_embs.cpu().detach().tolist()[0]))

4096


In [5]:
box34_text_search_results = client.search(
    collection_name="test",
    anns_field="text_dense",
    data=box34_embs.cpu().detach().tolist(),
    limit=10,
    search_params={"metric_type": "IP"},
    output_fields=["type", "image_url"],  # specifies fields to be returned
)

for box34_text_search_result in box34_text_search_results[0]:
    print(box34_text_search_result["type"])
    print(box34_text_search_result["image_url"])
    print(box34_text_search_result["distance"])


box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/34.png
0.807702898979187
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/386.png
0.604670524597168
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/59.png
0.583060622215271
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/19.png
0.5746939778327942
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/595.png
0.5625044107437134
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/296.png
0.5522609353065491
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3688.png
0.5430629253387451
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/667.png
0.5391879081726074
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3034.png
0.5342887043952942
pie
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/pie/png/963.png
0.5233592391014099


In [6]:
box34_img_search_results = client.search(
    collection_name="test",
    anns_field="image_dense",
    data=box34_embs.cpu().detach().tolist(),
    limit=10,
    search_params={"metric_type": "IP"},
    output_fields=["type", "image_url"],  # specifies fields to be returned
)

for box34_img_search_result in box34_img_search_results[0]:
    print(box34_img_search_result["type"])
    print(box34_img_search_result["image_url"])
    print(box34_img_search_result["distance"])

box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/386.png
0.37802088260650635
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/725.png
0.3613850772380829
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3034.png
0.34688594937324524
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/34.png
0.34349524974823
stream
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/stream/png/501.png
0.3385471701622009
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3688.png
0.3367108106613159
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/430.png
0.334938108921051
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/59.png
0.3343046009540558
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/296.png
0.3305954933166504
stream
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/stream/png/615.png
0.3222346305847168


In [15]:
from pymilvus import AnnSearchRequest

# text semantic search (dense)
request_1 = AnnSearchRequest(
    data=box34_embs.cpu().detach().tolist(),
    anns_field="text_dense",
    param={
        "metric_type": "IP"
    },
    limit=10
)

# text-to-image search (multimodal)
request_2 = AnnSearchRequest(
    data=box34_embs.cpu().detach().tolist(),
    anns_field="image_dense",
    param={
        "metric_type": "IP"
    },
    limit=10
)

reqs = [request_1, request_2]

In [16]:
from pymilvus import RRFRanker, WeightedRanker

rrf_ranker = RRFRanker()
weighed_ranker = WeightedRanker(0, 1)

In [17]:
hybrid_search_results = client.hybrid_search(
    collection_name="test",  # target collection
    reqs=reqs,
    ranker=rrf_ranker,
    limit=10,  # number of returned entities
    output_fields=["type", "image_url"],  # specifies fields to be returned
)

In [18]:
for hybrid_search_result in hybrid_search_results[0]:
    print(hybrid_search_result["type"])
    print(hybrid_search_result["image_url"])

box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/386.png
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/34.png
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/59.png
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3034.png
ridgeline
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/ridgeline/png/3688.png
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/296.png
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/725.png
stream
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/stream/png/501.png
violin
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/violin/png/19.png
box
/home/dukaixing/RAG4Ghart/Dataset-ZXQ/sample100/box/png/595.png
