# Setup

In [None]:
feature_shape = None
features_dir = None
bin_name = None

In [None]:
import os

dir_path = os.getcwd()
parent_dir_path = os.path.dirname(dir_path)

if not feature_shape:
    feature_shape = 512
    
if not features_dir:
    features_dir = f'{parent_dir_path}/clip/CLIPv2_features'

if not bin_name:
    bin_name = 'faiss_clipv2_cosine.bin'

In [None]:
! pip install faiss-cpu faiss-gpu

In [None]:
import os
import glob
import faiss
import numpy as np
from tqdm import tqdm

# Indexing

In [None]:
def create_faiss_indexes(cpu_bin_name, gpu_bin_name, features_dir, feature_shape):
    """
    Create both CPU and GPU FAISS indexes.

    Parameters:
    - cpu_bin_name: Name of the output CPU FAISS index file
    - gpu_bin_name: Name of the output GPU FAISS index file
    - features_dir: Directory containing feature files
    - feature_shape: Shape of each feature vector

    Returns:
    - None (saves the indexes to disk)
    """
    # Create a flat (CPU) index
    cpu_index = faiss.IndexFlatIP(feature_shape)

    # Try to create a GPU index
    try:
        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(
            res, 0, faiss.IndexFlatIP(feature_shape))
        print("GPU index creation is available")
        use_gpu = True
    except Exception as e:
        print(f"GPU index creation not available: {e}")
        use_gpu = False

    # Add vectors to the indexes
    for data_part in tqdm(sorted(os.listdir(features_dir)), desc="Processing data parts"):
        for feature_path in tqdm(sorted(glob.glob(os.path.join(features_dir, data_part, '*.npy'))),
                                 desc=f"Processing {data_part}"):
            feats = np.load(feature_path)
            feats = feats.astype(np.float32)
            cpu_index.add(feats)
            if use_gpu:
                gpu_index.add(feats)

    # Write the CPU index to disk
    faiss.write_index(cpu_index, cpu_bin_name)
    print(f"CPU FAISS index saved to {cpu_bin_name}")

    # If GPU was available, write the GPU index to disk
    if use_gpu:
        gpu_index_cpu = faiss.index_gpu_to_cpu(gpu_index)
        faiss.write_index(gpu_index_cpu, gpu_bin_name)
        print(f"GPU FAISS index saved to {gpu_bin_name}")

In [None]:
create_faiss_index(bin_name, features_dir, feature_shape)