# Embedding Generation and Analysis

This notebook demonstrates how to generate embeddings for furniture products using sentence-transformers and visualize the embedding space.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## 1. Load Model and Generate Embeddings


In [None]:
# Load the sentence transformer model
model_name = "all-MiniLM-L6-v2"
print(f"Loading model: {model_name}")
model = SentenceTransformer(model_name)

# Load sample data (same as previous notebook)
np.random.seed(42)
n_products = 50  # Smaller sample for demonstration
categories = ['sofa', 'chair', 'table', 'bed', 'desk', 'storage']

data = {
    'id': range(1, n_products + 1),
    'name': [f"{cat.title()} {i}" for cat in np.random.choice(categories, n_products) for i in range(1, n_products//len(categories) + 1)][:n_products],
    'category': np.random.choice(categories, n_products),
    'price': np.random.normal(500, 200, n_products).clip(50, 2000),
    'description': [f"Beautiful {cat} perfect for your home" for cat in np.random.choice(categories, n_products)],
}

df = pd.DataFrame(data)
print(f"Dataset shape: {df.shape}")
df.head()


In [None]:
# Create text for embedding (combine name, category, description)
df['text_for_embedding'] = df['name'] + " " + df['category'] + " " + df['description']

# Generate embeddings
print("Generating embeddings...")
embeddings = model.encode(df['text_for_embedding'].tolist())
print(f"Embedding shape: {embeddings.shape}")
print(f"Embedding dimension: {embeddings.shape[1]}")

# Store embeddings in dataframe
df['embedding'] = [emb.tolist() for emb in embeddings]
