In [1]:
!pip install faiss-cpu numpy

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [2]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.70.0
    Uninstalling openai-1.70.0:
      Successfully uninstalled openai-1.70.0
Successfully installed openai-0.28.0


In [3]:
import faiss
import openai
import numpy as np

In [5]:
openai.api_key = "YOUR_OPENAI_API_KEY_HERE"

In [6]:
# Helper: Get embedding using OpenAI
def get_embedding(text):
    response = openai.Embedding.create(
        input=[text],
        model="text-embedding-ada-002"
    )
    return np.array(response['data'][0]['embedding'], dtype=np.float32)

In [7]:
# Example texts
texts = [
    "FAISS is a library for efficient similarity search.",
    "It is developed by Facebook AI Research.",
    "It supports cosine and L2 distance search.",
    "You can use FAISS with OpenAI embeddings."
]

In [8]:
# Get embeddings
embeddings = np.array([get_embedding(text) for text in texts])

In [9]:
embeddings

array([[-0.02997606,  0.02110047, -0.01385765, ...,  0.00743819,
        -0.00234973, -0.05118818],
       [-0.00988787,  0.00257791,  0.00713675, ..., -0.01343943,
        -0.01611655, -0.03309409],
       [ 0.00019641,  0.02353237, -0.00626444, ...,  0.00275607,
        -0.02853837, -0.02821943],
       [-0.01981817, -0.00651386,  0.01091175, ...,  0.00290081,
         0.01450751, -0.03783847]], dtype=float32)

In [10]:
# Normalize if using cosine similarity
faiss.normalize_L2(embeddings)  # Optional: only if you're doing cosine similarity

In [11]:
# Create FAISS index
dimension = embeddings.shape[1]
print(dimension)

1536


In [12]:
index = faiss.IndexFlatIP(dimension)

In [13]:
index

<faiss.swigfaiss_avx2.IndexFlatIP; proxy of <Swig Object of type 'faiss::IndexFlatIP *' at 0x7adb9953d9e0> >

In [14]:
# Add vectors to index
index.add(embeddings)

In [15]:
# Store original texts for lookup
text_id_map = {i: text for i, text in enumerate(texts)}

In [16]:
text_id_map

{0: 'FAISS is a library for efficient similarity search.',
 1: 'It is developed by Facebook AI Research.',
 2: 'It supports cosine and L2 distance search.',
 3: 'You can use FAISS with OpenAI embeddings.'}

In [17]:
# Query example
query = "What is FAISS?"
query_vector = get_embedding(query)
faiss.normalize_L2(query_vector.reshape(1, -1))  # Normalize for cosine
# Search top 2 most similar
k = 2
distances, indices = index.search(query_vector.reshape(1, -1), k)


In [18]:
# Display results
for i, idx in enumerate(indices[0]):
    print(f"Result {i+1}: {text_id_map[idx]} (Score: {distances[0][i]:.4f})")

Result 1: FAISS is a library for efficient similarity search. (Score: 0.8826)
Result 2: You can use FAISS with OpenAI embeddings. (Score: 0.8335)
