# 01 — Data Prep & EDA

Explore the sales time series: trends, seasonality, promotions, and holidays.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from pathlib import Path

DATA_PATH = Path('../data/sales_synthetic.csv')
df = pd.read_csv(DATA_PATH, parse_dates=['Date'])
df = df.sort_values('Date').set_index('Date')

print(df.head())
print(df.describe())

# Plot raw series
df['Sales'].plot(title='Daily Sales')
plt.ylabel('Sales')
plt.show()

# Weekly average view
df['Sales'].resample('W').mean().plot(title='Weekly Avg Sales')
plt.ylabel('Sales')
plt.show()

# Seasonal decomposition (weekly frequency ~ 7)
res = seasonal_decompose(df['Sales'], period=7, model='additive')
res.trend.plot(title='Trend')
plt.show()
res.seasonal.plot(title='Seasonality (Weekly)')
plt.show()
res.resid.plot(title='Residuals')
plt.show()

In [None]:
# Save prepared series for modeling
prepared = df.copy()
prepared.to_csv('../data/prepared_sales.csv')
prepared.tail()