# ðŸ“Š Discount % â€” Lag Pattern Analysis (Weekly)
Explores short-term, monthly, biannual and annual lag correlations in the discount percent time series.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

plt.rcParams.update({
    'figure.dpi': 120,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': True,
    'grid.alpha': 0.3
})

## 1. Resample & Build Lag DataFrame

In [None]:
ts = (df_index_dateSeen.resample("W")["discount_percent"].mean()).dropna()

# Weekly-meaningful lags:
# 1-4   â†’ short-term memory
# 8,12-14 â†’ ~monthly / quarterly cycles
# 26    â†’ biannual
# 30,52 â†’ long-term / annual
lags = [1, 2, 3, 4, 8, 12, 13, 14, 26, 30, 52]

lag_df = pd.DataFrame({"current": ts})
for l in lags:
    lag_df[f"lag_{l}"] = ts.shift(l)

print(f"Time series length : {len(ts)} weeks")
print(f"Date range         : {ts.index.min().date()} â†’ {ts.index.max().date()}")
lag_df.tail()

## 2. Correlation Table â€” Which Lags Matter Most?

In [None]:
print(f"{'Lag':<10} {'Correlation':>12}  {'Weeks Behind':>14}  {'Strength':>10}")
print("=" * 52)
for l in lags:
    corr = lag_df["current"].corr(lag_df[f"lag_{l}"])
    strength = (
        "â˜…â˜…â˜… Strong" if abs(corr) >= 0.7 else
        "â˜…â˜…  Moderate" if abs(corr) >= 0.4 else
        "â˜…   Weak"
    )
    print(f"lag_{l:<6} {corr:>12.4f}  {l:>14} wks  {strength}")
print("=" * 52)

## 3. Raw Time Series

In [None]:
fig, ax = plt.subplots(figsize=(14, 4))
ts.plot(ax=ax, color='steelblue', linewidth=1.2)
ax.set_title("Weekly Mean Discount % Over Time", fontsize=13)
ax.set_ylabel("Discount %")
ax.set_xlabel("")
plt.tight_layout()
plt.show()

## 4. Lag Scatter Plots (with Regression Lines)

In [None]:
n_lags = len(lags)
cols   = 4
rows   = (n_lags + cols - 1) // cols

fig, axes = plt.subplots(rows, cols, figsize=(cols * 4, rows * 3.5))
axes = axes.flatten()

for i, l in enumerate(lags):
    ax   = axes[i]
    temp = lag_df[["current", f"lag_{l}"]].dropna()
    corr = temp["current"].corr(temp[f"lag_{l}"])

    ax.scatter(temp[f"lag_{l}"], temp["current"],
               alpha=0.5, edgecolors="k", linewidths=0.3,
               color="steelblue", s=30)

    # Regression line
    m, b = np.polyfit(temp[f"lag_{l}"], temp["current"], 1)
    x_   = np.linspace(temp[f"lag_{l}"].min(), temp[f"lag_{l}"].max(), 100)
    ax.plot(x_, m * x_ + b, color="crimson", linewidth=1.5)

    # Colour-code title by correlation strength
    title_color = "green" if abs(corr) >= 0.7 else "darkorange" if abs(corr) >= 0.4 else "gray"
    ax.set_title(f"lag_{l}  (r = {corr:.3f})", fontsize=9, color=title_color)
    ax.set_xlabel(f"lag_{l}", fontsize=8)
    ax.set_ylabel("current", fontsize=8)
    ax.tick_params(labelsize=7)

for j in range(i + 1, len(axes)):
    axes[j].set_visible(False)

plt.suptitle("Lag Scatter Plots â€” Discount % (Weekly)\n"
             "Green title = strong | Orange = moderate | Gray = weak",
             fontsize=12, y=1.02)
plt.tight_layout()
plt.show()

## 5. ACF & PACF â€” Up to 60 Lags
> **ACF**: total correlation at each lag.  
> **PACF**: direct effect after removing shorter-lag influence.  
> Spikes outside the shaded band are statistically significant.

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 7))
plot_acf (ts, lags=60, ax=ax1, title="ACF  â€” up to 60 lags (weeks)",  color="steelblue")
plot_pacf(ts, lags=60, ax=ax2, title="PACF â€” up to 60 lags (weeks)", method="ywm", color="crimson")
plt.tight_layout()
plt.show()

## 6. Rolling 26-Week Correlation â€” Are Relationships Stable?
> A flat line = stable pattern. Wild swings = the lag relationship breaks down over time.

In [None]:
rolling_lags   = [1, 4, 12, 52]
rolling_colors = ["steelblue", "orange", "green", "crimson"]

fig, ax = plt.subplots(figsize=(14, 4))

for l, color in zip(rolling_lags, rolling_colors):
    col = f"lag_{l}"
    if col in lag_df.columns:
        rolling_corr = lag_df["current"].rolling(26).corr(lag_df[col])
        rolling_corr.plot(ax=ax, label=f"lag_{l}", color=color, linewidth=1.5)

ax.axhline(0,    color="black", linewidth=0.8, linestyle="--")
ax.axhline(0.7,  color="green", linewidth=0.6, linestyle=":", alpha=0.7)
ax.axhline(-0.7, color="green", linewidth=0.6, linestyle=":", alpha=0.7)
ax.set_title("Rolling 26-Week Correlation: Current vs Selected Lags", fontsize=12)
ax.set_ylabel("Pearson r")
ax.legend(title="Lag")
plt.tight_layout()
plt.show()

## 7. Correlation Heatmap â€” All Lags at a Glance

In [None]:
import seaborn as sns

corr_matrix = lag_df.dropna().corr()

fig, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(
    corr_matrix,
    annot=True, fmt=".2f",
    cmap="RdYlGn",
    center=0, vmin=-1, vmax=1,
    linewidths=0.5,
    ax=ax
)
ax.set_title("Correlation Heatmap â€” Current + All Lags", fontsize=13)
plt.tight_layout()
plt.show()

## 8. Final Lag DataFrame

In [None]:
lag_df