# Furniture Product Dataset Exploration

This notebook explores the furniture product dataset to understand the data structure, distributions, and characteristics.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## 1. Load and Inspect Dataset


In [None]:
# Load the dataset (assuming CSV format)
# Replace with your actual dataset path
dataset_path = "../../backend/datasets/furniture_products.csv"

try:
    df = pd.read_csv(dataset_path)
    print(f"Dataset loaded successfully!")
    print(f"Shape: {df.shape}")
except FileNotFoundError:
    print("Dataset file not found. Creating sample data for demonstration...")
    
    # Create sample furniture dataset
    np.random.seed(42)
    n_products = 100
    
    categories = ['sofa', 'chair', 'table', 'bed', 'desk', 'storage']
    
    data = {
        'id': range(1, n_products + 1),
        'name': [f"{cat.title()} {i}" for cat in np.random.choice(categories, n_products) for i in range(1, n_products//len(categories) + 1)][:n_products],
        'category': np.random.choice(categories, n_products),
        'price': np.random.normal(500, 200, n_products).clip(50, 2000),
        'description': [f"Beautiful {cat} perfect for your home" for cat in np.random.choice(categories, n_products)],
        'image_url': [f"https://via.placeholder.com/300x200?text={cat.title()}" for cat in np.random.choice(categories, n_products)]
    }
    
    df = pd.DataFrame(data)
    print(f"Sample dataset created with {df.shape[0]} products")
