In [11]:
# %% [markdown]
# # Creating Vector Embeddings
# ![Embedding Visualization](visuals/embedding_vis.png)

# %%
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
import pandas as pd
import plotly.express as px

# %%
def visualize_embeddings(embeddings, texts):
    df = pd.DataFrame({
        'text': [t[:50]+"..." for t in texts],
        'dim1': [e[0] for e in embeddings],
        'dim2': [e[1] for e in embeddings]
    })
    
    fig = px.scatter(df, x='dim1', y='dim2', text='text', 
                    title="2D Projection of Document Embeddings")
    fig.update_traces(textposition='top center')
    fig.write_image("visuals/embedding_visualization.png")
    fig.show()

# %%
# Example Usage
documents = [...] # From extract.ipynb
embeddings = OpenAIEmbeddings()

# Create FAISS index
db = FAISS.from_documents(documents, embeddings)

# Sample visualization
sample_texts = [doc.page_content[:200] for doc in documents[:5]]
sample_embeddings = embeddings.embed_documents(sample_texts)
visualize_embeddings(sample_embeddings, sample_texts)

# %%
# Save the vector store
db.save_local("faiss_store")

# Example Output
print("Vector store created with:")
print(f"- {len(documents)} documents")
print(f"- {len(db.index_to_docstore_id)} vectors")
print(f"- Dimension: {db.index.d}")

  embeddings = OpenAIEmbeddings()


ValidationError: 1 validation error for OpenAIEmbeddings
  Value error, Did not find openai_api_key, please add an environment variable `OPENAI_API_KEY` which contains it, or pass `openai_api_key` as a named parameter. [type=value_error, input_value={'model_kwargs': {}, 'cli...20, 'http_client': None}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error