In [3]:
import faiss
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Load the FAISS index
index = faiss.read_index("151vector.faiss")

# Check if the index supports direct reconstruction
if hasattr(index, "reconstruct"):
    num_vectors = index.ntotal
    vectors = np.empty((num_vectors, index.d), dtype="float32")
    for i in range(num_vectors):
        vectors[i] = faiss.vector_to_array(index.reconstruct(i))
else:
    # If reconstruction is not supported, try approximate extraction using stored centroids
    if hasattr(index, "pq"):
        vectors = index.pq.centroids.T  # Extract product quantizer centroids
    else:
        raise RuntimeError("Cannot extract vectors. Ensure the index is compatible or use original data.")

# Perform dimensionality reduction using t-SNE
tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Visualize the results
plt.figure(figsize=(10, 8))
plt.scatter(reduced_vectors[:, 0], reduced_vectors[:, 1], s=10, alpha=0.7)
plt.title("FAISS Vectors Visualized with t-SNE", fontsize=14)
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.show()


RuntimeError: Error in virtual void faiss::Index::reconstruct(idx_t, float *) const at /Users/runner/miniforge3/conda-bld/faiss-split_1723208685316/work/faiss/Index.cpp:54: reconstruct not implemented for this type of index

In [4]:
import faiss

# Load the FAISS index
index = faiss.read_index("151vector.faiss")

# Get basic metrics
print("Index type:", type(index))  # Type of the FAISS index
print("Total number of vectors:", index.ntotal)  # Total number of vectors in the index
print("Vector dimensionality:", index.d)  # Dimensionality of vectors in the index


Index type: <class 'faiss.swigfaiss.IndexIDMap'>
Total number of vectors: 150
Vector dimensionality: 384


In [5]:
import faiss
import numpy as np

# Load the FAISS index
index = faiss.read_index("151vector.faiss")

# Generate some random query vectors (use real data if available)
query_vectors = np.random.random((5, index.d)).astype("float32")  # 5 queries

# Perform a search on the index
k = 10  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_vectors, k)

# Display results
print("Query Results:")
for i, (d, idx) in enumerate(zip(distances, indices)):
    print(f"Query {i+1}:")
    print("  Nearest neighbor distances:", d)
    print("  Nearest neighbor indices:", idx)


Query Results:
Query 1:
  Nearest neighbor distances: [117.697655 117.85494  118.0235   118.14925  118.18328  118.19527
 118.24588  118.29262  118.37459  118.40091 ]
  Nearest neighbor indices: [ 61 139 102   2  20  74 110  81   9 134]
Query 2:
  Nearest neighbor distances: [135.36436 135.38535 135.41605 135.43025 135.56174 135.69043 135.69205
 135.72473 135.7445  135.7641 ]
  Nearest neighbor indices: [110  85 123  14 104  79 139 143 121 144]
Query 3:
  Nearest neighbor distances: [129.57281 129.82973 129.89774 129.91023 129.95175 129.95514 129.97098
 129.97417 130.01773 130.04318]
  Nearest neighbor indices: [ 41  35  52 139  37  48  22  20  32  61]
Query 4:
  Nearest neighbor distances: [135.46259 135.50572 135.57841 135.5999  135.6352  135.69493 135.74667
 135.78978 135.7948  135.85426]
  Nearest neighbor indices: [116 127 148  60 113  40 137  10  62 129]
Query 5:
  Nearest neighbor distances: [130.43816 130.49734 130.50299 130.57205 130.59286 130.61615 130.63475
 130.68912 130.719

In [6]:
# Assuming you have ground truth neighbors for each query
ground_truth = [[0, 1, 2], [3, 4, 5], ...]  # Example ground truth indices

# Calculate recall
recall = 0
for i, gt in enumerate(ground_truth):
    retrieved = set(indices[i])
    recall += len(set(gt) & retrieved) / len(gt)

recall /= len(ground_truth)
print("Recall:", recall)


TypeError: 'ellipsis' object is not iterable

In [8]:
if isinstance(index, faiss.IndexIVF):
    print("Number of centroids:", index.nlist)

if isinstance(index, faiss.IndexHNSW):
    print("HNSW graph size:", index.hnsw.nb_neighbors())


In [9]:
import faiss
import numpy as np

# Load the FAISS index
index = faiss.read_index("151vector.faiss")

# Basic metrics
print("Index type:", type(index))
print("Total number of vectors:", index.ntotal)
print("Vector dimensionality:", index.d)

# Generate synthetic query vectors for evaluation
query_vectors = np.random.random((5, index.d)).astype("float32")  # 5 queries
k = 10  # Number of nearest neighbors

# Perform a search
distances, indices = index.search(query_vectors, k)

# Display query results
print("\nQuery Results:")
for i, (d, idx) in enumerate(zip(distances, indices)):
    print(f"Query {i+1}:")
    print("  Nearest neighbor distances:", d)
    print("  Nearest neighbor indices:", idx)


Index type: <class 'faiss.swigfaiss.IndexIDMap'>
Total number of vectors: 150
Vector dimensionality: 384

Query Results:
Query 1:
  Nearest neighbor distances: [128.36996 128.759   128.91304 128.91353 128.94388 129.01703 129.04842
 129.0509  129.10396 129.10602]
  Nearest neighbor indices: [ 15 127 132  28  11 142  91  37 129 126]
Query 2:
  Nearest neighbor distances: [131.49994 131.54587 131.59146 131.60416 131.64996 131.68205 131.71709
 131.75531 131.766   131.79218]
  Nearest neighbor indices: [  0  43  91 128  18 134 119  90 104  13]
Query 3:
  Nearest neighbor distances: [134.60004 134.8996  135.00937 135.11104 135.26534 135.29013 135.33966
 135.34982 135.36504 135.44463]
  Nearest neighbor indices: [126  91 110 117  79  76  56 116  98   9]
Query 4:
  Nearest neighbor distances: [127.33923 127.63579 127.64788 127.8627  127.93192 128.03268 128.11331
 128.13553 128.1389  128.17932]
  Nearest neighbor indices: [56 79 49 96  6 30 67 15 29 13]
Query 5:
  Nearest neighbor distances: [1

In [10]:
# Load the index
index = faiss.read_index("151vector.faiss")

# Retrieve the mapped IDs
id_map = faiss.IDMap(index)
all_ids = id_map.id_map

print("Mapped IDs:", all_ids[:10])  # Print the first 10 IDs


AttributeError: module 'faiss' has no attribute 'IDMap'

In [15]:
import faiss
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

# Load the index and extract vectors
index = faiss.read_index("ideas1.faiss")
num_vectors = index.ntotal
vectors = np.empty((num_vectors, index.d), dtype="float32")
ids = []

for i in range(num_vectors):
    vectors[i] = faiss.vector_to_array(index.reconstruct(i))
    ids.append(i)

# Dimensionality reduction using t-SNE
tsne = TSNE(n_components=2, random_state=42)
reduced_vectors = tsne.fit_transform(vectors)

# Plot the results
plt.figure(figsize=(10, 8))
plt.scatter(reduced_vectors[:, 0], reduced_vectors[:, 1], s=30, alpha=0.7, label="Vectors")
plt.title("FAISS Vector Visualization with t-SNE", fontsize=14)
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.legend()
plt.show()


RuntimeError: Error in virtual void faiss::Index::reconstruct(idx_t, float *) const at /Users/runner/miniforge3/conda-bld/faiss-split_1723208685316/work/faiss/Index.cpp:54: reconstruct not implemented for this type of index

In [12]:
import faiss

# Load the FAISS index
index = faiss.read_index("151vector.faiss")

# Check if the index is an IDMap
if isinstance(index, faiss.IndexIDMap):
    # Retrieve all IDs from the index
    ids = faiss.vector_to_array(index.id_map)
    print("Mapped IDs (first 10):", ids[:10])
else:
    print("Index is not an IndexIDMap.")


Mapped IDs (first 10): [0 1 2 3 4 5 6 7 8 9]


In [13]:
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

# Step 1: Load the CSV file
file_path = "151_ideas_updated2.csv"
df = pd.read_csv(file_path)

# Extract the "Ideas" column
ideas = df["Ideas"].tolist()

# Step 2: Convert text to embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")  # Load a pre-trained embedding model
embeddings = model.encode(ideas, convert_to_numpy=True)

# Step 3: Create a FAISS index
dimension = embeddings.shape[1]  # Dimensionality of the embeddings
index = faiss.IndexFlatL2(dimension)  # Use L2 distance for similarity
id_map = faiss.IndexIDMap(index)  # Wrap the index with ID mapping

# Step 4: Add embeddings and IDs to the index
ids = np.arange(len(ideas))  # Generate unique IDs for each idea
id_map.add_with_ids(embeddings, ids)

# Step 5: Save the FAISS index
faiss.write_index(id_map, "ideas_vector_db.faiss")
print(f"Created and saved FAISS index with {id_map.ntotal} vectors.")

# Optional: Query the index
def query_index(query_text, top_k=5):
    query_vector = model.encode([query_text], convert_to_numpy=True)
    distances, indices = id_map.search(query_vector, top_k)
    results = [{"idea": ideas[idx], "distance": dist} for idx, dist in zip(indices[0], distances[0])]
    return results

# Example query
example_query = "How to focus on self-expression?"
results = query_index(example_query)
print("\nQuery Results:")
for result in results:
    print(f"Distance: {result['distance']:.4f} | Idea: {result['idea']}")


Created and saved FAISS index with 150 vectors.

Query Results:
Distance: 0.8613 | Idea: 2) Full Expression - it takes a lot of effort for one to understand who they are when they are comfortable and how to channel the most real expressions of themselves what holds people back? Shyness, distraction (inability to focus on that which they want express)
Distance: 1.1096 | Idea: 16) games – Be a good lil sociopath path and self-monitor as they say “fake it b4 u make it" - so much is a game, but it’s usually played with people we are not so close to, so maybe we can transcend games but the give and take of life seems so entrenched... to see how you can improve your self-monitoring consult Snyder’s guide to self-monitoring (SELF-MONITORING SCALE by Mark Snyder (1974)) Self – Monitor http://en.wikipedia.org/wiki/Self-monitoring Snyder’s scale - http://faculty.washington.edu/janegf/selfmonitoring.htm Sociopath – be a good sociopath
Distance: 1.1305 | Idea: 54) Inability to step Back -> Why? 1)

In [14]:
# Load the saved FAISS index
index = faiss.read_index("ideas_vector_db.faiss")

# Reconstruct vectors by ID
def reconstruct_vector(vector_id):
    try:
        reconstructed_vector = index.reconstruct(vector_id)
        return reconstructed_vector
    except Exception as e:
        print(f"Error reconstructing vector with ID {vector_id}: {e}")
        return None

# Example: Reconstruct a vector
example_id = 0  # Replace with the ID you want to reconstruct
reconstructed = reconstruct_vector(example_id)

if reconstructed is not None:
    print(f"Reconstructed vector for ID {example_id}: {reconstructed}")
else:
    print(f"Failed to reconstruct vector for ID {example_id}.")


Error reconstructing vector with ID 0: Error in virtual void faiss::Index::reconstruct(idx_t, float *) const at /Users/runner/miniforge3/conda-bld/faiss-split_1723208685316/work/faiss/Index.cpp:54: reconstruct not implemented for this type of index
Failed to reconstruct vector for ID 0.
