In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set(style="whitegrid")

In [None]:
# Load data (adjust path if running from a different working directory)
path = '../data/raw/superstore.csv'
try:
    df = pd.read_csv(path, parse_dates=['Order Date'], low_memory=False)
except Exception:
    df = pd.read_csv(path, low_memory=False)
    df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')

# Coerce Sales to numeric and drop rows missing Order Date or Sales
df['Sales'] = pd.to_numeric(df['Sales'], errors='coerce')
df = df.dropna(subset=['Order Date', 'Sales'])
df.head()

## Aggregate to Monthly Level
Create a `Month` column (period-month) and sum `Sales` per month.

In [None]:
# Monthly aggregation
df['Month'] = df['Order Date'].dt.to_period('M').dt.to_timestamp()
monthly = df.groupby('Month')['Sales'].sum().reset_index()
monthly = monthly.sort_values('Month')
monthly.head()

In [None]:
# Plot monthly total sales and a 3-month rolling average
plt.figure(figsize=(12,5))
plt.plot(monthly['Month'], monthly['Sales'], marker='o', label='Monthly Sales')
plt.plot(monthly['Month'], monthly['Sales'].rolling(window=3, center=False).mean(), label='3-month MA', linestyle='--')
plt.title('Monthly Total Sales')
plt.ylabel('Sales (USD)')
plt.xlabel('Month')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Month-over-Month Change
Compute percent change to highlight growth/decline between months.

In [None]:
monthly['pct_change'] = monthly['Sales'].pct_change() * 100
monthly[['Month','Sales','pct_change']].head(12)

In [None]:
# Plot percent change (bar)
plt.figure(figsize=(12,4))
sns.barplot(x='Month', y='pct_change', data=monthly, color='steelblue')
plt.xticks(rotation=45)
plt.ylabel('Percent Change (%)')
plt.title('Month-over-Month % Change in Sales')
plt.tight_layout()
plt.show()

## Seasonal / Category Breakdown (optional)
You can decompose by `Category` or `Segment` to see which groups drive monthly trends. The cell below shows how to compute monthly series per `Category`.

In [None]:
# Monthly sales by Category (pivot table)
monthly_cat = df.groupby([df['Order Date'].dt.to_period('M').dt.to_timestamp(), 'Category'])['Sales'].sum().reset_index().pivot(index=0, columns=1, values='Sales').fillna(0)
monthly_cat.index.name = 'Month'
monthly_cat.head()

In [None]:
# Plot monthly sales by Category (stacked area)
plt.figure(figsize=(12,5))
monthly_cat.plot.area(ax=plt.gca(), cmap='tab20')
plt.title('Monthly Sales by Category (stacked)')
plt.ylabel('Sales (USD)')
plt.xlabel('Month')
plt.legend(title='Category', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Interpretation & Next Steps
- The line plot and rolling average show overall trend and smoothing to reveal underlying direction.
- Percent-change bars highlight months with large increases or drops which merit investigation (promotions, seasonality, data issues).
- The stacked area by `Category` shows the contribution of each category to monthly totals.

Next possible analyses:
- Decompose time series into trend/seasonal/residual components (statsmodels)
- Drill into `State` or `Region` to find geographic drivers
- Compare year-over-year (seasonality) by aligning months across years
- Export `monthly` and `monthly_cat` to CSV in `data/processed/` for reporting.