In [4]:
import numpy as np
import faiss                   # make faiss available
import os
import cupy as cp
import time
import json
from tqdm import tqdm

cupy_dir = os.path.join(f"MDI_RAG_Image2Image_Research/data/cupy_embeddings_level1+")
cupy_files = os.listdir(cupy_dir)
cupy_files = [os.path.join(cupy_dir, file) for file in cupy_files]
cupy_info_dir = os.path.join(f"MDI_RAG_Image2Image_Research/data/cupy_infos_level1+")

cuda_type = "faiss"
# cuda_type = "faiss_flatL2"

cuda_cache_dir_name = cuda_type + "_embeddings_level1+"
cuda_infos_dir_name = cuda_type + "_infos_level1+"
cuda_index_dir_name = cuda_type + "_index_level1+"

cuda_index_dir = "MDI_RAG_Image2Image_Research/data/" + cuda_index_dir_name
cuda_infos_dir = "MDI_RAG_Image2Image_Research/data/" + cuda_infos_dir_name
cuda_files = os.listdir(cuda_index_dir)

# 读取存储的index文件，并进行查询
index_list = []
for i in range(len(cuda_files)):
    if cuda_type == "faiss":
        for j in range(4):
            temp_index = faiss.read_index(f"{cuda_index_dir}/{cuda_type}_index_batch_{i}_{j}.bin")
            index_list.append(temp_index)
    else:
        temp_index = faiss.read_index(f"{cuda_index_dir}/{cuda_type}_index_batch_{i}.bin")
        index_list.append(temp_index)
    break

# 读取存储的info
info_list = []
for i in range(len(cuda_files)):
    if cuda_type == "faiss":
        for j in range(4):
            with open(f"{cuda_infos_dir}/{cuda_type}_infos_batch_{i}_{j}.json", "r") as f:
                info = json.load(f)
                info_list.append(info)
    else:
        with open(f"{cuda_infos_dir}/{cuda_type}_infos_batch_{i}.json", "r") as f:
            info = json.load(f)
            info_list.append(info)
    break



In [5]:
# 构建1024个query,测试gpu
res = faiss.StandardGpuResources()
gpu_index_list = []
for i in range(len(index_list)):
    gpu_index = faiss.index_cpu_to_gpu(res, 0, index_list[i])
    gpu_index_list.append(gpu_index)
# gpu_index = faiss.index_cpu_to_gpu(res, 0, index_list[0])
gpu_index.ntotal

854923

In [15]:
gpu_index_list

[<faiss.swigfaiss_avx512.GpuIndexIVFFlat; proxy of <Swig Object of type 'faiss::gpu::GpuIndexIVFFlat *' at 0x7fe92d34cae0> >,
 <faiss.swigfaiss_avx512.GpuIndexIVFFlat; proxy of <Swig Object of type 'faiss::gpu::GpuIndexIVFFlat *' at 0x7fe92d34dfb0> >,
 <faiss.swigfaiss_avx512.GpuIndexIVFFlat; proxy of <Swig Object of type 'faiss::gpu::GpuIndexIVFFlat *' at 0x7fe92d5b76c0> >,
 <faiss.swigfaiss_avx512.GpuIndexIVFFlat; proxy of <Swig Object of type 'faiss::gpu::GpuIndexIVFFlat *' at 0x7fe92d5b7d20> >]

In [None]:
n_list = [1024,100,10,1]
# n_list = [10]
for n in n_list:
    time_list = []
    for i in tqdm(range(1000)):
        query = np.random.rand(n, 1024).astype('float32')
        begin_time = time.time()
        if cuda_type == "faiss":
            for j in range(len(gpu_index_list)):
                D, I = gpu_index_list[j].search(query, 10)
        else:
            D, I = gpu_index_list[0].search(query, 10)
        end_time = time.time()
        time_list.append(end_time - begin_time)
    print(f"n={n}",np.mean(time_list))
print(cuda_type)


  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:10<00:00, 92.97it/s]


n=1024 0.004198521614074707


100%|██████████| 1000/1000 [00:02<00:00, 461.26it/s]


n=100 0.0015241003036499023


100%|██████████| 1000/1000 [00:01<00:00, 780.45it/s]


n=10 0.001194572925567627


100%|██████████| 1000/1000 [00:00<00:00, 1474.89it/s]

n=1 0.0006620974540710449
faiss





In [23]:
I

array([[1425265, 1751998, 1537612, 1537613, 2570457,   38942,   37593,
          38022,   38945, 2569697]])

In [19]:
res = faiss.StandardGpuResources()
gpu_index = faiss.index_cpu_to_gpu(res, 0, index_list[0])

In [18]:
begin_time = time.time()
D, I = gpu_index.search(query, 10)
end_time = time.time()
print(f"Query time: {end_time - begin_time}")
# 释放资源
del gpu_index


Query time: 0.05280613899230957
