In [8]:
import ollama
import numpy as np

# Example text data
documents = [
    "Python is a programming language that lets you work quickly.",
    "Machine learning automates model building through data analysis.",
    "Artificial Intelligence (AI) is intelligence demonstrated by machines.",
    "Natural language processing (NLP) is a field of AI that focuses on human language interaction.",
]

# Generate embeddings using Ollama
def get_embeddings(documents, embedding_model="mxbai-embed-large"):
    embeddings = []
    for doc in documents:
        response = ollama.embeddings(model=embedding_model, prompt=doc)
        embeddings.append(response["embedding"])
    # Convert embeddings to a NumPy array
    embeddings = np.array(embeddings).astype("float32")
    return embeddings

embeddings = get_embeddings(documents)



In [9]:
import faiss

# Define the dimensionality of the embeddings
dimension = embeddings.shape[1]  # Number of features in each embedding

print(f"dimension={dimension}")
# Create a FAISS index
index = faiss.IndexFlatL2(dimension)  # L2 distance for similarity search

# Add embeddings to the index
index.add(embeddings)

dimension=1024


In [14]:
# Query embedding (generate an embedding for the query)
query = "What is Natural Language Processing?"

def get_embedding(text, embedding_model="mxbai-embed-large") -> np.ndarray:
    response = ollama.embeddings(model=embedding_model, prompt=text)
    embeddings = []
    embeddings.append(response["embedding"])
    embeddings = np.array(embeddings).astype('float32')
    return embeddings


query_embedding = get_embedding(query)

# Search for the top-k most similar embeddings
k = 2  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_embedding, k)

# Print results
query = "What is Natural Language Processing?"
print(f"Indices of similar documents for search term:\n\t '{query}':\t", indices)
for i in indices[0]:
    print(f"Document {i}: {documents[i]}")

print("Distances to similar documents:", distances)

Indices of similar documents for search term:
	 'What is Natural Language Processing?':	 [[3 2]]
Document 3: Natural language processing (NLP) is a field of AI that focuses on human language interaction.
Document 2: Artificial Intelligence (AI) is intelligence demonstrated by machines.
Distances to similar documents: [[ 81.82834 277.91656]]


In [8]:
# Save the index to disk
faiss.write_index(index, "faiss_index.index")

# Load the index from disk
loaded_index = faiss.read_index("faiss_index.index")