In [1]:
import os
import numpy as np
import faiss

In [3]:
# ===================== 配置 =====================
EMBEDDING_DIR = "./embeddings"
INDEX_DIR = "./indexes"
os.makedirs(INDEX_DIR, exist_ok=True)

In [6]:
print("Loading embeddings...")
text_embs = np.load(os.path.join(EMBEDDING_DIR, "text_embeddings.npy")).astype("float32")
image_embs = np.load(os.path.join(EMBEDDING_DIR, "image_embeddings.npy")).astype("float32")

dim = text_embs.shape[1]
print(f"Text embeddings:    {text_embs.shape}")
print(f"Image embeddings:   {image_embs.shape}")
print(f"Dimension:  {dim}")

# 构建 Flat 索引（精确检索，1000 条足够快）
print("\nBuilding Flat index for text...")
text_index = faiss.IndexFlatIP(dim)  # Inner Product（余弦相似度）
text_index.add(text_embs)
faiss.write_index(text_index, os.path.join(INDEX_DIR, "text_flat.index"))

print("Building Flat index for image...")
image_index = faiss.IndexFlatIP(dim)
image_index.add(image_embs)
faiss.write_index(image_index, os.path.join(INDEX_DIR, "image_flat.index"))

print(f"\n✓ Indexes saved to {INDEX_DIR}/")
print(f"  - text_flat.index ({text_embs.shape[0]} vectors)")
print(f"  - image_flat.index ({image_embs.shape[0]} vectors)")


Loading embeddings...
Text embeddings:    (1000, 512)
Image embeddings:   (1000, 512)
Dimension:  512

Building Flat index for text...
Building Flat index for image...

✓ Indexes saved to ./indexes/
  - text_flat.index (1000 vectors)
  - image_flat.index (1000 vectors)
