In [None]:
import os
import faiss
import numpy as np
import pandas as pd
from dotenv import load_dotenv

load_dotenv();

from langchain_openai import OpenAIEmbeddings


texts = [
    "The sun was shining brightly in the clear blue sky",
    "The cat sat on the windowsill, watching the birds outside",
    "The city was bustling with people, cars, and noise",
    "The teacher wrote complex equations on the blackboard",
    "The company will announce its quarterly earnings next week",
]

In [None]:
# Model & Client
client = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    base_url=os.environ["OPENAI_BASE_URL"],
    api_key=lambda: os.environ["OPENAI_API_KEY"]
)

# Convert texts to embeddings
doc_embeddings = client.embed_documents(texts)

# Build index
df = pd.DataFrame(doc_embeddings)
n_dim = df.shape[1]
index = faiss.IndexFlatL2(n_dim)
index.add(df)


In [None]:
# Convert query to embeddings
search_results = 5
query = "Some question about my pet"
query_embeddings = client.embed_query(query)

# Query results
distances, indices = index.search(np.array([query_embeddings]), k=search_results)

# Display results
print(f"Query: '{query}'\nDocument ranking:")
max_len = max([len(text) for text in texts])
for i, (row, distance) in enumerate(zip(indices[0], distances[0], strict=True)):
    print(f"  {i}. {texts[row]:<{max_len + 3}} (distance: {distance:.4f})")
