# ASOS Product Catalog EDA

This notebook focuses on the **Product Dimension** of the dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlalchemy
import sys
import os

# Add project root to path
sys.path.append('../')
from src.config import Config

# Database Connection
try:
    engine = sqlalchemy.create_engine(Config().DATABASE_URL)
    print("Database Connection Successful")
except Exception as e:
    print(f"Database Connection Failed: {e}")

## 1. Load Product Data

In [None]:
query = """
SELECT 
    p.name, p.base_price, 
    b.brand_name, c.category_name, 
    p.num_sizes, p.num_images
FROM dim_product p
JOIN dim_brand b ON p.brand_id = b.brand_id
JOIN dim_category c ON p.category_id = c.category_id
WHERE p.base_price IS NOT NULL
"""
df = pd.read_sql(query, engine)
df.head()

## 2. Price Distribution

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(df['base_price'], bins=30, kde=True)
plt.title('Product Price Distribution (Â£)')
plt.show()

## 3. Top Brands

In [None]:
top_brands = df['brand_name'].value_counts().head(10)
plt.figure(figsize=(10, 5))
sns.barplot(x=top_brands.values, y=top_brands.index)
plt.title('Top 10 Brands by Product Count')
plt.show()