<a href="https://colab.research.google.com/github/comparativechrono/Rephasing-of-Seasonal-Birth-Rates-in-the-United-Kingdom-/blob/main/Supplemental_material/SI_figures_1_and_2_and_table_S2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Reproduces SI figures 1 and 2 and table S2.
Requires: pandas, numpy, matplotlib, statsmodels, scipy
Place `environmental.csv` in the working directory.
"""
import pandas as pd, numpy as np, matplotlib.pyplot as plt, statsmodels.api as sm
from statsmodels.tsa.seasonal import STL
from statsmodels.stats.diagnostic import breaks_cusumolsresid
from scipy.stats import f

# 1 Load data -------------------------------------------------------------
df = pd.read_csv('environmental.csv')
parse = lambda s: (pd.to_datetime(s, format='%b-%y')
                  - pd.DateOffset(years=100) if pd.to_datetime(s, format='%b-%y').year > 2025
                  else pd.to_datetime(s, format='%b-%y'))
df['Date'] = df['Date'].apply(parse)
df = df.set_index('Date').sort_index()

# 2 STL decomposition -----------------------------------------------------
period = 12
seasonals = {c: STL(df[c], period=period, robust=True).fit().seasonal for c in ['BR', 'photoperiod', 'light', 'temp']}
dfs = pd.DataFrame(seasonals).dropna()

# Figure 1 – birth‑season amplitude --------------------------------------
amp = dfs['BR'].groupby(dfs.index.year).agg(lambda x: x.max() - x.min())
plt.figure(facecolor='white'); plt.plot(amp.index, amp.values)
plt.axvline(1975, ls='--', c='r'); plt.title('Figure 1  Annual amplitude of birth seasonality')
plt.xlabel('Year'); plt.ylabel('Peak‑to‑trough amplitude'); plt.tight_layout(); plt.show()

# 3 Static regression set‑up --------------------------------------------
y, X = dfs['BR'], sm.add_constant(dfs[['photoperiod', 'temp']])

# Figure 2 – Chow scan ----------------------------------------------------
scan_dates = pd.date_range('1965-01', '1985-12', freq='MS')
F_vals, p_vals = [], []
full_res = sm.OLS(y, X).fit(); rss_full = (full_res.resid**2).sum(); n, k = X.shape
for dt in scan_dates:
    if dt <= y.index.min() + pd.DateOffset(years=5) or dt >= y.index.max() - pd.DateOffset(years=5):
        F_vals.append(np.nan); p_vals.append(np.nan); continue
    pre, post = y.index <= dt, y.index > dt
    rss_split = (sm.OLS(y[pre],  X.loc[pre]).fit().resid**2).sum() + \
                (sm.OLS(y[post], X.loc[post]).fit().resid**2).sum()
    F = ((rss_full - rss_split) / k) / (rss_split / (n - 2*k))
    F_vals.append(F); p_vals.append(1 - f.cdf(F, k, n - 2*k))
Chow = pd.DataFrame({'F': F_vals, 'p': p_vals}, index=scan_dates)
plt.figure(facecolor='white'); plt.plot(Chow.index, Chow['F'])
plt.axvline(pd.Timestamp('1975-01'), ls='--', c='r')
plt.axvline(Chow['F'].idxmax(), ls='--', c='g')
plt.title('Figure 2  Chow F‑statistic scan'); plt.xlabel('Breakpoint'); plt.ylabel('F'); plt.tight_layout(); plt.show()
print('\nTop Chow candidates:\n', Chow.sort_values('F', ascending=False).head(5))
