# ðŸ“¦ Product Catalog Analysis â€” EDA, Pricing, Ratings, Profitability

In [None]:
# STEP 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
sns.set(style='whitegrid')
pd.set_option('display.max_columns', None)

In [None]:
# STEP 2: Load Dataset
df = pd.read_csv('product_data.csv', parse_dates=['launch_date'])
print('Shape:', df.shape)
df.info()
df.head()

In [None]:
# STEP 3: Data Quality Check
print('Missing Values:
', df.isnull().sum())
print('Duplicate Rows:', df.duplicated().sum())
df.describe(include='all').T

In [None]:
# STEP 4: Derived Metrics
df['profit_margin'] = (df['price'] - df['cost']) / df['price'] * 100
df['age_days'] = (pd.Timestamp('today') - df['launch_date']).dt.days
df['profit_margin'] = df['profit_margin'].round(2)
df.head()

In [None]:
# STEP 5: Product Category Distribution
plt.figure(figsize=(7,4))
sns.countplot(x='product_category', data=df, palette='Set2', order=df['product_category'].value_counts().index)
plt.title('Product Distribution by Category')
plt.xticks(rotation=45)
plt.show()

In [None]:
# STEP 6: Price & Cost Analysis
plt.figure(figsize=(8,5))
sns.histplot(df['price'], bins=30, kde=True, color='skyblue')
plt.title('Price Distribution')
plt.show()

plt.figure(figsize=(8,5))
sns.scatterplot(x='cost', y='price', hue='product_category', data=df, alpha=0.7)
plt.title('Cost vs Price by Category')
plt.show()

In [None]:
# STEP 7: Profit Margin & Brand Insights
plt.figure(figsize=(8,5))
sns.boxplot(x='product_category', y='profit_margin', data=df, palette='coolwarm')
plt.title('Profit Margin by Product Category')
plt.xticks(rotation=45)
plt.show()

top_brands = df.groupby('brand')['profit_margin'].mean().sort_values(ascending=False).head(10)
top_brands.plot(kind='bar', figsize=(8,5), color='teal')
plt.title('Top 10 Brands by Avg Profit Margin')
plt.show()

In [None]:
# STEP 8: Ratings & Warranty
plt.figure(figsize=(8,5))
sns.histplot(df['rating'], bins=20, kde=True, color='gold')
plt.title('Product Rating Distribution')
plt.show()

plt.figure(figsize=(8,5))
sns.boxplot(x='product_category', y='rating', data=df, palette='pastel')
plt.title('Ratings by Product Category')
plt.xticks(rotation=45)
plt.show()

sns.scatterplot(x='warranty_period', y='rating', data=df, hue='product_category', alpha=0.6)
plt.title('Warranty vs Rating')
plt.show()

In [None]:
# STEP 9: Stock & Product Age
plt.figure(figsize=(8,5))
sns.scatterplot(x='age_days', y='stock_quantity', hue='product_category', data=df, alpha=0.6)
plt.title('Product Age vs Stock Quantity')
plt.show()

corr = df[['price','cost','profit_margin','stock_quantity','rating','warranty_period']].corr()
plt.figure(figsize=(7,5))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

In [None]:
# STEP 10: Profitability Segmentation
bins = [0, 20, 40, 60, 80, 100]
labels = ['Low', 'Below Avg', 'Moderate', 'High', 'Premium']
df['profit_segment'] = pd.cut(df['profit_margin'], bins=bins, labels=labels, include_lowest=True)
plt.figure(figsize=(8,5))
sns.countplot(x='profit_segment', data=df, palette='crest')
plt.title('Product Profitability Segments')
plt.show()

In [None]:
# STEP 11: Key Insights
insights = [
    'Top 10 brands by profit margin identified â€” helps pricing strategy',
    'Warranty correlates positively with product rating in some categories',
    'Profit margins vary widely across categories â€” possible over/underpricing',
    'Older products tend to have higher stock â€” potential dead inventory',
    'Correlation analysis aids cross-functional optimization (cost, stock, profit)'
]
for i in insights:
    print('-', i)

In [None]:
# Save processed output
df.to_csv('product_analysis_output.csv', index=False)