In [1]:
# %pip install faiss-cpu langchain_openai langchain_community

In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.schema.document import Document
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# Initialize the embeddings class
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Embed a single query
query = "Hello, world!"
vector = embeddings.embed_query(query)
print(vector[:5])


[-0.005823844112455845, -0.024085838347673416, -0.022070934996008873, 0.02761966735124588, -0.0027666164096444845]


In [3]:

# Embed multiple documents at once
documents = ["Alice works in finance", "Bob is a database administrator", "Carl manages Bob and Alice"]

# Convert the list of strings to a list of Document objects
document_objects = [Document(page_content=doc) for doc in documents]

# Embed the Document objects
vectors = embeddings.embed_documents([doc.page_content for doc in document_objects])
print(len(vectors), len(vectors[0]))


3 3072


In [4]:

# Initialize the FAISS database with the Document objects and embeddings
db = FAISS.from_documents(document_objects, embeddings)

query = "Tell me about Alice"
docs = db.similarity_search(query)

# Perform a similarity search with scores
docs_and_scores = db.similarity_search_with_score(query)
print(docs_and_scores)


[(Document(page_content='Alice works in finance'), 0.7962496), (Document(page_content='Carl manages Bob and Alice'), 1.248132), (Document(page_content='Bob is a database administrator'), 1.6588588)]
