# Time Series Analysis — Practical Notebook

Generated: 2025-09-02 02:01 UTC

In [None]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.seasonal import STL
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
sns.set_theme(context='notebook', style='whitegrid', palette='deep')
RNG = np.random.default_rng(42)
TARGET='revenue'; EXOG=['promo','holiday']

## Generate synthetic data (replace with your CSV if needed)

In [None]:
# Synthetic daily series
days=620
idx = pd.date_range('2023-01-01', periods=days, freq='D')
trend = np.linspace(100, 170, days)
weekly = 10 * np.sin(2*np.pi*(idx.dayofweek)/7)
annual = 5 * np.sin(2*np.pi*(idx.dayofyear)/365.25)
noise = RNG.normal(0, 6, days)
promo = (RNG.random(days)<0.08).astype(int)
holiday = ((idx.month==12) & (idx.day>=20)).astype(int)
revenue = trend + weekly + annual + 8*promo + 12*holiday + noise

ts = pd.DataFrame({'revenue':revenue,'promo':promo,'holiday':holiday}, index=idx)
# fill small gaps
nan_days = RNG.choice(ts.index, size=10, replace=False)
ts.loc[nan_days, 'revenue'] = np.nan
ts['revenue'] = ts['revenue'].ffill().interpolate('time')
ts = ts.asfreq('D').sort_index()
ts.head()

## EDA, STL, Stationarity, Baselines, SARIMAX (condensed)

In [None]:
fig, ax = plt.subplots(1,1, figsize=(12,4)); ax.plot(ts.index, ts[TARGET]); ax.set_title('Daily revenue'); plt.show()
# STL
stl = STL(ts[TARGET], period=7, robust=True).fit(); stl.plot(); plt.show()
# ADF/KPSS
print('ADF:', adfuller(ts[TARGET].dropna())[:2]); print('KPSS:', kpss(ts[TARGET].dropna(), regression='c', nlags='auto')[:2])
# Baselines
cut = ts.index.max() - pd.Timedelta(days=60)
train, valid = ts.loc[:cut], ts.loc[cut+pd.Timedelta(days=1):]
h = len(valid)
naive = pd.Series(train[TARGET].iloc[-1], index=valid.index)
seasonal_naive = pd.Series((train[TARGET].iloc[-7:].tolist() * (h//7 + 1))[:h], index=valid.index)
sma7 = pd.Series(train[TARGET].iloc[-7:].mean(), index=valid.index)
fig,ax=plt.subplots(1,1,figsize=(12,4)); ax.plot(train[TARGET]); ax.plot(valid[TARGET]); ax.plot(naive,'--'); ax.plot(seasonal_naive,'--'); ax.plot(sma7,'--'); plt.show()
# SARIMAX
mod = sm.tsa.statespace.SARIMAX(train[TARGET], order=(1,1,1), seasonal_order=(1,1,1,7), exog=train[EXOG],
                                enforce_stationarity=False, enforce_invertibility=False)
res = mod.fit(disp=False)
fc = res.get_forecast(steps=len(valid), exog=valid[EXOG])
mean_fc, conf = fc.predicted_mean, fc.conf_int()
fig, ax = plt.subplots(1,1, figsize=(12,4)); ax.plot(train[TARGET], label='train'); ax.plot(valid[TARGET], label='actual');
ax.plot(mean_fc, label='SARIMAX'); ax.fill_between(conf.index, conf.iloc[:,0], conf.iloc[:,1], alpha=.2); ax.legend(); plt.show()