In [2]:
import numpy as np
import pandas as pd

In [12]:
df = pd.read_csv('data/SuperStoreOrders.csv')

In [5]:
df.columns 

Index(['order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_name',
       'segment', 'state', 'country', 'market', 'region', 'product_id',
       'category', 'sub_category', 'product_name', 'sales', 'quantity',
       'discount', 'profit', 'shipping_cost', 'order_priority', 'year'],
      dtype='object')

In [16]:
df['order_date'] = pd.to_datetime(df['order_date'], format='mixed', dayfirst=True, errors='coerce')
df['ship_date']  = pd.to_datetime(df['ship_date'], format='mixed', dayfirst=True, errors='coerce')


In [21]:
df['sales'] = pd.to_numeric(df['sales'], errors='coerce')

In [22]:
# ---- Week-wise Sales ----
weekly_sales = (
    df.groupby(pd.Grouper(key='order_date', freq='W'))['sales']
      .sum()
      .reset_index()
)
weekly_sales['pct_change_week'] = weekly_sales['sales'].pct_change() * 100  # percentage change



In [24]:
# ---- Month-wise Sales ----
monthly_sales = (
    df.groupby(pd.Grouper(key='order_date', freq='ME'))['sales']
      .sum()
      .reset_index()
)
monthly_sales['pct_change_month'] = monthly_sales['sales'].pct_change() * 100

In [27]:
df['year'] = df['order_date'].dt.year
df['month'] = df['order_date'].dt.month

In [29]:
# Week number inside the month
df['nth_week'] = ((df['order_date'].dt.day - 1) // 7) + 1

In [30]:

# --- Aggregate sales ---
weekly_in_month = (
    df.groupby(['year', 'month', 'nth_week'])['sales']
      .sum()
      .reset_index()
      .sort_values(['year', 'month', 'nth_week'])
)


In [31]:
# --- Calculate nth-week MoM percentage change ---
weekly_in_month['pct_change_mom'] = (
    weekly_in_month.groupby('nth_week')['sales']
    .pct_change() * 100
)


In [32]:
# Save to CSV
weekly_in_month.to_csv("metrics/nth_week_monthly_change.csv", index=False)

In [34]:
# ---- Combine metrics ----
metrics = pd.DataFrame({
    'week_start': weekly_sales['order_date'],
    'weekly_sales': weekly_sales['sales'],
    'pct_change_week': weekly_sales['pct_change_week'],
})

metrics_month = pd.DataFrame({
    'month_start': monthly_sales['order_date'],
    'monthly_sales': monthly_sales['sales'],
    'pct_change_month': monthly_sales['pct_change_month'],
})

# Optional: Save both metrics to separate CSVs
metrics.to_csv("metrics/weekly_metrics.csv", index=False)
metrics_month.to_csv("metrics/monthly_metrics.csv", index=False)

In [35]:
# to find product share across subcategories per week

In [36]:
# Step 1: Weekly sales by product (within sub_category)
weekly_product_sales = (
    df.groupby([pd.Grouper(key='order_date', freq='W'), 'category', 'sub_category', 'product_name'])['sales']
      .sum()
      .reset_index()
)

In [37]:

# Step 2: Weekly total sales per sub_category
weekly_subcat_sales = (
    df.groupby([pd.Grouper(key='order_date', freq='W'), 'category', 'sub_category'])['sales']
      .sum()
      .reset_index()
      .rename(columns={'sales': 'subcat_sales'})
)

In [38]:
# Step 3: Merge & compute share %
weekly_share = weekly_product_sales.merge(
    weekly_subcat_sales,
    on=['order_date', 'category', 'sub_category'],
    how='left'
)

In [39]:
weekly_share['pct_share'] = (weekly_share['sales'] / weekly_share['subcat_sales']) * 100

# Save result
weekly_share.to_csv("metrics/weekly_product_share.csv", index=False)