# Embeddings and Vector Database Setup

Required:
- OpenAI API key in `.env`
- Pinecone API key in `.env`
- customers-100.csv file in current directory

In [None]:
import os
from dotenv import load_dotenv
import pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import CSVLoader

load_dotenv()

PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_ENV')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

## Understanding Embeddings

In [None]:
embeddings = OpenAIEmbeddings()

# Example text to vector conversion
text = "Software Engineer with 5 years experience"
vector = embeddings.embed_query(text)

print(f"Vector dimension: {len(vector)}")
print(f"First 5 values: {vector[:5]}")

## Initialize Pinecone

In [None]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENV
)

index_name = 'employee-index'

if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine'
    )

index = pinecone.Index(index_name)

## Load and Store Employee Data

In [None]:
# Load CSV data
loader = CSVLoader(
    file_path="customers-100.csv",
    csv_args={
        'delimiter': ',',
        'quotechar': '"'
    }
)
documents = loader.load()

# Create embeddings and store
for i, doc in enumerate(documents):
    vector = embeddings.embed_query(doc.page_content)
    
    index.upsert([
        (f"emp{i}", vector, {
            "text": doc.page_content,
            **doc.metadata
        })
    ])

print(f"Stored {len(documents)} employee records")

## Search Examples

In [None]:
def search_employees(query, top_k=3):
    query_vector = embeddings.embed_query(query)
    results = index.query(
        vector=query_vector,
        top_k=top_k,
        include_metadata=True
    )
    
    for match in results['matches']:
        print(f"Score: {match['score']:.2f}")
        print(f"Employee Data: {match['metadata']['text']}\n")

# Try some searches
print("Searching for engineers:")
search_employees("Find me software engineers")

print("\nSearching for managers:")
search_employees("Show me managers")

## Update and Delete Operations

In [None]:
# Update an employee record
new_text = "Updated employee information for emp0"
new_vector = embeddings.embed_query(new_text)

index.upsert([
    ("emp0", new_vector, {"text": new_text})
])

# Delete an employee record
index.delete(ids=["emp1"])

# Check index statistics
stats = index.describe_index_stats()
print("Current index stats:", stats)