In [None]:
import ollama
import numpy as np

# Example text data
documents = [
    "This is the first document.",
    "Here is another document.",
    "And this is the third one."
]

# Generate embeddings using Ollama
embeddings = []
for doc in documents:
    response = ollama.embeddings(model="mxbai-embed-large", prompt=doc)
    print(response["embedding"])
    embeddings.append(response["embedding"])

# Convert embeddings to a NumPy array
embeddings = np.array(embeddings).astype('float32')

[-0.21331888437271118, -0.6940878033638, 0.7370022535324097, -0.3593367040157318, -0.6735249757766724, 0.02634437382221222, 0.0209670290350914, -0.130447655916214, 0.5675534009933472, 0.45190566778182983, 0.003338925540447235, 0.026013150811195374, 0.37982985377311707, 0.6866794228553772, -0.5592450499534607, 0.5542436838150024, -0.8488186001777649, 0.3293175995349884, -1.097079873085022, 0.08267776668071747, -0.28860437870025635, 0.03149156644940376, -1.0443240404129028, -0.723651647567749, -0.03741026669740677, 0.601622998714447, 0.16328386962413788, 0.24140837788581848, 1.4273380041122437, 0.39425432682037354, 0.20545519888401031, 0.06289847195148468, -0.09996592998504639, -0.48504072427749634, 0.23483119904994965, -0.5288522243499756, 0.5995486974716187, -1.1251288652420044, -0.10660718381404877, -0.47898727655410767, 0.24975642561912537, 0.21671059727668762, 1.0436409711837769, -0.33702531456947327, -1.3878086805343628, -0.27991563081741333, -0.2782095670700073, -0.434293091297149

In [5]:
import faiss

# Define the dimensionality of the embeddings
dimension = embeddings.shape[1]  # Number of features in each embedding

print(f"dimension={dimension}")
# Create a FAISS index
index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity search

# Add embeddings to the index
index.add(embeddings)

dimension=1024


In [7]:
# Query embedding (generate an embedding for the query)
query_text = "Find a document similar to this."
query_embedding = np.array(ollama.embeddings(model="mxbai-embed-large", prompt=query_text)["embedding"]).astype('float32')

# Search for the top-k most similar embeddings
k = 2  # Number of nearest neighbors to retrieve
distances, indices = index.search(np.array([query_embedding]), k)

# Print results
print("Indices of similar documents:", indices)
print("Distances to similar documents:", distances)

Indices of similar documents: [[1 0]]
Distances to similar documents: [[117.83384 195.73122]]


In [8]:
# Save the index to disk
faiss.write_index(index, "faiss_index.index")

# Load the index from disk
loaded_index = faiss.read_index("faiss_index.index")