# Supermart Grocery Sales – EDA

This notebook explores the dataset, distribution of sales, profit, categories, and time trends.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv(r"/mnt/data/Supermart-Grocery-Sales-Retail-Analytics/data/raw/supermart_sales.csv")
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.month

print("Rows, Columns:", df.shape)
display(df.head())

print("\nSummary:")
display(df.describe(include='all'))


In [None]:

# Sales by Category
cat_sales = df.groupby('Category')['Sales'].sum().sort_values(ascending=False)
cat_sales.plot(kind='bar', title='Total Sales by Category')
plt.tight_layout()
plt.show()


In [None]:

# Monthly Sales Trend
monthly = df.set_index('Order Date').resample('M')['Sales'].sum()
monthly.plot(title='Monthly Sales Trend')
plt.tight_layout()
plt.show()


In [None]:

# Top 10 Products by Sales
top_products = df.groupby('Product Name')['Sales'].sum().sort_values(ascending=False).head(10)
top_products.plot(kind='barh', title='Top 10 Products by Sales')
plt.tight_layout()
plt.show()


In [None]:

# Profit by Region
region_profit = df.groupby('Region')['Profit'].sum().sort_values(ascending=False)
region_profit.plot(kind='bar', title='Total Profit by Region')
plt.tight_layout()
plt.show()
