In [None]:
import pandas as pd
from pathlib import Path
from statsmodels.tsa.stattools import adfuller, kpss
import numpy as np

In [3]:
FUELS_XLSX = Path("../data/exogenous/fuels/Co2CoalTTF.xlsx")
IBERIAN_EXC = Path("../data/exogenous/electricity/iberian_exception.parquet")
LOAD_DAILY = Path("../data/exogenous/electricity/entsoe_load_forecast_daily_20210521_20250430.parquet")
FLOW_DAILY = Path("../data/exogenous/electricity/entsoe_scheduled_exchanges_daily_20210521_20250430.parquet")
CAC = Path("../data/exogenous/stocks/cac_eur_pts.parquet")
IBEX = Path("../data/exogenous/stocks/ibex_eur_pts.parquet")
PSI = Path("../data/exogenous/stocks/psi_eur_pts.parquet")

SAMPLE_START = "2021-05-21"
SAMPLE_END   = "2025-04-30"


def prepare_exog(
    fuels_path: Path = FUELS_XLSX,
    iberian_path: Path = IBERIAN_EXC,
    load_path: Path = LOAD_DAILY,
    flow_path: Path = FLOW_DAILY,
    cac_path: Path = CAC,
    ibex_path: Path = IBEX,
    psi_path: Path = PSI,
    sample_start: str = SAMPLE_START,
    sample_end: str = SAMPLE_END,
) -> pd.DataFrame:

    # --- Fuels -------------------------------------------------------------------
    fuels = pd.read_excel(fuels_path).copy()
    fuels["date"] = pd.to_datetime(fuels["date"])
    fuels = fuels.sort_values("date")
    fuels = fuels.drop(columns=[c for c in ["year","month","day"] if c in fuels.columns])

    # --- Iberian exception --------------------------------------------------------
    ie = pd.read_parquet(iberian_path).copy()
    if "date_local" in ie.columns:
        ie["date"] = pd.to_datetime(ie["date_local"])
        ie = ie.drop(columns=["date_local"])
    else:
        ie["date"] = pd.to_datetime(ie["date"])
    ie = ie[["date","iberian_exception"]].sort_values("date")

    # --- Load (pivot to wide) -----------------------------------------------------
    load = pd.read_parquet(load_path).copy()
    load["date"] = pd.to_datetime(load["date_local"])
    load_wide = (
        load.pivot(index="date", columns="area", values="value_mean")
            .rename(columns=lambda a: f"load_mean_mw_{a.lower()}")
            .reset_index()
    )

    # --- Flows (pivot to wide) ----------------------------------------------------
    flow = pd.read_parquet(flow_path).copy()
    flow["date"] = pd.to_datetime(flow["date_local"])
    flow["pair"] = flow["from_area"].str.lower() + "_" + flow["to_area"].str.lower()
    flow_wide = (
        flow.pivot(index="date", columns="pair", values="value_mean")
            .rename(columns=lambda p: f"flow_mean_mw_{p}")
            .reset_index()
    )

    # --- Stocks (infer columns robustly) -----------------------------------------
    def _read_stock(path: Path, outcol: str) -> pd.DataFrame:
        s = pd.read_parquet(path).copy()
        date_col = "date" if "date" in s.columns else ("Date" if "Date" in s.columns else s.columns[0])
        s[date_col] = pd.to_datetime(s[date_col])

        for vc in ["close","Close","adjclose","Adj Close","value","Value"]:
            if vc in s.columns:
                val_col = vc
                break
        else:
            val_col = s.select_dtypes("number").columns[0]

        return s[[date_col, val_col]].rename(columns={date_col:"date", val_col: outcol}).sort_values("date")

    cac = _read_stock(cac_path, "cac_eur_pts")
    ibex = _read_stock(ibex_path, "ibex_eur_pts")
    psi = _read_stock(psi_path, "psi_eur_pts")

    # --- Merge ALL (outer) first --------------------------------------------------
    exog = fuels.merge(ie, on="date", how="outer")
    exog = exog.merge(load_wide, on="date", how="outer")
    exog = exog.merge(flow_wide, on="date", how="outer")
    exog = exog.merge(cac, on="date", how="outer")
    exog = exog.merge(ibex, on="date", how="outer")
    exog = exog.merge(psi, on="date", how="outer")

    exog["date"] = pd.to_datetime(exog["date"])
    exog = exog.sort_values("date")

    # --- Reindex to full DAILY calendar for your sample ---------------------------
    full_idx = pd.date_range(sample_start, sample_end, freq="D")
    exog = exog.set_index("date").reindex(full_idx).rename_axis("date").reset_index()

    # --- Fill rules ---------------------------------------------------------------
    # Dummy: missing days -> 0
    exog["iberian_exception"] = exog["iberian_exception"].fillna(0).astype(int)

    # Forward fill *prices / indices / fuels* (typical for non-trading / weekends)
    ffill_cols = [c for c in ["TTF","co2","coal","cac_eur_pts","ibex_eur_pts","psi_eur_pts"] if c in exog.columns]
    exog[ffill_cols] = exog[ffill_cols].ffill()

    # For load/flows: choose your policy.
    # Default: DO NOT ffill (keeps missing days as NaN so you see gaps)
    # If you want to ffill them anyway, uncomment:
    # lf_cols = [c for c in exog.columns if c.startswith("load_mean_mw_")]
    # fl_cols = [c for c in exog.columns if c.startswith("flow_mean_mw_")]
    # exog[lf_cols + fl_cols] = exog[lf_cols + fl_cols].ffill()

    # Calendar breakdown
    exog["year"] = exog["date"].dt.year
    exog["month"] = exog["date"].dt.month
    exog["day"] = exog["date"].dt.day

    # Put date columns first
    first = ["date","year","month","day","iberian_exception"]
    rest = [c for c in exog.columns if c not in first]
    exog = exog[first + rest]

    return exog

In [4]:
exog = prepare_exog()
print(exog.head())
print(exog.tail())

        date  year  month  day  iberian_exception        TTF    co2  \
0 2021-05-21  2021      5   21                  0  24.790001  51.29   
1 2021-05-22  2021      5   22                  0  24.790001  51.29   
2 2021-05-23  2021      5   23                  0  24.790001  51.29   
3 2021-05-24  2021      5   24                  0  24.760000  52.68   
4 2021-05-25  2021      5   25                  0  26.190001  51.24   

        coal  load_mean_mw_es  load_mean_mw_fr  load_mean_mw_pt  \
0  69.997537     27481.958333     47206.250000      5501.958333   
1  69.997537     24528.333333     43120.833333      4970.875000   
2  69.997537     22572.791667     41512.500000      4588.625000   
3  69.954973     26303.541667     41808.333333      5414.458333   
4  69.959184     27226.625000     49120.833333      5588.791667   

   flow_mean_mw_es_fr  flow_mean_mw_es_pt  flow_mean_mw_fr_es  \
0                 0.0          811.558333         3013.416667   
1                 0.0         1609.21250

In [5]:
exog.to_parquet("../data/exogenous/all_exog.parquet", index=False)


## Helper: run ADF + KPSS and summarize

In [None]:
def unit_root_tests(series, name, adf_reg="c", kpss_reg="c"):
    """
    Run ADF (H0: unit root) and KPSS (H0: stationary) on a 1D series.

    Returns a dict with stats, p-values and a simple classification.
    """
    s = pd.Series(series).dropna()
    s = s.astype(float)

    if s.empty:
        return {
            "series": name,
            "nobs": 0,
            "adf_stat": np.nan,
            "adf_pvalue": np.nan,
            "kpss_stat": np.nan,
            "kpss_pvalue": np.nan,
            "decision": "no data"
        }

    # ADF: H0 = unit root
    adf_res = adfuller(s, regression=adf_reg, autolag="AIC")
    adf_stat, adf_pvalue = adf_res[0], adf_res[1]

    # KPSS: H0 = stationary
    # nlags="auto" (Bartlett kernel + automatic bandwidth)
    kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
    kpss_stat, kpss_pvalue = kpss_res[0], kpss_res[1]

    # Combine both tests into a simple decision rule
    # (you can tweak thresholds if you prefer)
    alpha = 0.05
    if (adf_pvalue < alpha) and (kpss_pvalue > alpha):
        decision = "stationary (no unit root)"
    elif (adf_pvalue > alpha) and (kpss_pvalue < alpha):
        decision = "unit root / non-stationary"
    else:
        decision = "inconclusive / mixed"

    return {
        "series": name,
        "nobs": int(len(s)),
        "adf_stat": adf_stat,
        "adf_pvalue": adf_pvalue,
        "kpss_stat": kpss_stat,
        "kpss_pvalue": kpss_pvalue,
        "decision": decision
    }


def unit_root_table(df, cols, adf_reg="c", kpss_reg="c"):
    """
    Apply unit_root_tests to multiple columns of a DataFrame.
    """
    results = [
        unit_root_tests(df[col], col, adf_reg=adf_reg, kpss_reg=kpss_reg)
        for col in cols
    ]
    out = pd.DataFrame(results)
    return out[[
        "series",
        "nobs",
        "adf_stat",
        "adf_pvalue",
        "kpss_stat",
        "kpss_pvalue",
        "decision"
    ]]


## Apply to endogenous variables (rolling TSI)

In [7]:
import pandas as pd
from pathlib import Path

BASE_TSI_DIR = Path("../data/endogenous/prices") 
OUT_DIR      = Path("../data/endogenous/prices/differenced")

tsi_files = {
    "tsi_mhar_recov_neg": BASE_TSI_DIR / "rolling_tsi_mhar_recov_neg_recalculated.parquet",
    "tsi_mhar_recov_pos": BASE_TSI_DIR / "rolling_tsi_mhar_recov_pos_recalculated.parquet",
    "tsi_mhar_recov":     BASE_TSI_DIR / "rolling_tsi_mhar_recov_recalculated.parquet",
    "tsi_mhar_revar":     BASE_TSI_DIR / "rolling_tsi_mhar_revar_recalculated.parquet",
}

def load_tsi_df(path: Path, series_name: str) -> pd.DataFrame:
    """
    Load a rolling TSI parquet file where the index is the date and the
    single column is 'TSI_recalculated'. Returns a DF with ['date', series_name].
    """
    df = pd.read_parquet(path)

    # If date is the index, bring it out as a column
    if "date" not in df.columns:
        df = df.reset_index()

    # After reset_index, first column is the date, second is the TSI value
    # (in your case the value column is 'TSI_recalculated')
    cols = list(df.columns)
    date_col = cols[0]
    value_col = cols[1]

    df = df.rename(columns={date_col: "date", value_col: series_name})
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date").reset_index(drop=True)

    return df


In [8]:
all_tsi_results = []
combined = None  # optional: to build a single DF with all 4 series

for series_name, fpath in tsi_files.items():
    print(f"\n=== {series_name} ===")
    df_tsi = load_tsi_df(fpath, series_name)

    # Unit-root test on this single series
    tsi_unit_root = unit_root_table(df_tsi, [series_name],
                                    adf_reg="c",
                                    kpss_reg="c")
    tsi_unit_root.insert(1, "tsi_type", series_name)
    print(tsi_unit_root.round(4))

    all_tsi_results.append(tsi_unit_root)

    # build combined DF with all series (aligned by date) if you want later
    if combined is None:
        combined = df_tsi
    else:
        combined = combined.merge(df_tsi, on="date", how="outer")

# Summary table for all TSI tests
tsi_unit_root_all = pd.concat(all_tsi_results, ignore_index=True)
print("\n=== Unit-root tests for ALL rolling TSI series ===")
print(tsi_unit_root_all.round(4))

# Optional: inspect combined TSI DF
# print(combined.head())



=== tsi_mhar_recov_neg ===
               series            tsi_type  nobs  adf_stat  adf_pvalue  \
0  tsi_mhar_recov_neg  tsi_mhar_recov_neg  1077   -0.7278      0.8394   

   kpss_stat  kpss_pvalue                    decision  
0     3.7854         0.01  unit root / non-stationary  

=== tsi_mhar_recov_pos ===


look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")


               series            tsi_type  nobs  adf_stat  adf_pvalue  \
0  tsi_mhar_recov_pos  tsi_mhar_recov_pos  1077   -0.6529      0.8586   

   kpss_stat  kpss_pvalue                    decision  
0     4.1123         0.01  unit root / non-stationary  

=== tsi_mhar_recov ===
           series        tsi_type  nobs  adf_stat  adf_pvalue  kpss_stat  \
0  tsi_mhar_recov  tsi_mhar_recov  1077   -1.3489      0.6065     3.9541   

   kpss_pvalue                    decision  
0         0.01  unit root / non-stationary  

=== tsi_mhar_revar ===
           series        tsi_type  nobs  adf_stat  adf_pvalue  kpss_stat  \
0  tsi_mhar_revar  tsi_mhar_revar  1077    -2.148      0.2256     3.4167   

   kpss_pvalue                    decision  
0         0.01  unit root / non-stationary  

=== Unit-root tests for ALL rolling TSI series ===
               series            tsi_type  nobs  adf_stat  adf_pvalue  \
0  tsi_mhar_recov_neg  tsi_mhar_recov_neg  1077   -0.7278      0.8394   
1  tsi_mh

look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")


In [9]:
# 1) build a single DF with all four TSI series
dfs = []
for name, path in tsi_files.items():
    dfs.append(load_tsi_df(path, name))

tsi_all = dfs[0]
for df in dfs[1:]:
    tsi_all = tsi_all.merge(df, on="date", how="outer")

tsi_all = tsi_all.sort_values("date").reset_index(drop=True)

tsi_cols = list(tsi_all.columns)
tsi_cols.remove("date")

# 2) create first differences
tsi_diff = tsi_all.copy()
for col in tsi_cols:
    tsi_diff[f"d_{col}"] = tsi_diff[col].diff()

diff_cols = [f"d_{c}" for c in tsi_cols]

# 3) run unit-root tests on first differences
tsi_diff_unit_root = unit_root_table(tsi_diff, diff_cols,
                                     adf_reg="c",
                                     kpss_reg="c")

print("\n=== Unit-root tests for FIRST DIFFERENCES of TSI ===")
print(tsi_diff_unit_root.round(4))


=== Unit-root tests for FIRST DIFFERENCES of TSI ===
                 series  nobs  adf_stat  adf_pvalue  kpss_stat  kpss_pvalue  \
0  d_tsi_mhar_recov_neg  1076  -13.9681         0.0     0.4728       0.0478   
1  d_tsi_mhar_recov_pos  1076  -11.4653         0.0     0.6835       0.0150   
2      d_tsi_mhar_recov  1076   -5.0242         0.0     0.5882       0.0237   
3      d_tsi_mhar_revar  1076  -11.8078         0.0     0.1067       0.1000   

                    decision  
0       inconclusive / mixed  
1       inconclusive / mixed  
2       inconclusive / mixed  
3  stationary (no unit root)  


look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")


In [10]:
# --- build combined endogenous DF ---
dfs = []
for name, path in tsi_files.items():
    dfs.append(load_tsi_df(path, name))

tsi_all = dfs[0]
for df in dfs[1:]:
    tsi_all = tsi_all.merge(df, on="date", how="outer")

tsi_all = tsi_all.sort_values("date").reset_index(drop=True)

# --- first differences ---
tsi_cols = [c for c in tsi_all.columns if c != "date"]
tsi_diff = tsi_all[["date"]].copy()

for col in tsi_cols:
    tsi_diff[f"d_{col}"] = tsi_all[col].diff()

# drop the first row (all diffs are NaN there)
tsi_diff = tsi_diff.dropna(how="all", subset=[c for c in tsi_diff.columns if c != "date"]).reset_index(drop=True)

# save
endog_path = OUT_DIR / "endogenous_tsi_diff.parquet"
tsi_diff.to_parquet(endog_path, index=False)
print(f"Saved differenced endogenous TSI to: {endog_path}")
print(tsi_diff.head())

Saved differenced endogenous TSI to: ..\data\endogenous\prices\differenced\endogenous_tsi_diff.parquet
        date  d_tsi_mhar_recov_neg  d_tsi_mhar_recov_pos  d_tsi_mhar_recov  \
0 2022-05-21              0.565159              0.353851          0.318845   
1 2022-05-22              0.028779              0.053486          0.018242   
2 2022-05-23              0.017773              0.001561          0.009926   
3 2022-05-24             -0.010183             -0.026224         -0.020802   
4 2022-05-25              0.021521             -0.015650         -0.001137   

   d_tsi_mhar_revar  
0          0.464115  
1          0.090025  
2         -0.035204  
3         -0.033416  
4         -0.004788  


## Apply to exogenous variables

In [11]:
EXOG_IN_PATH  = Path("../data/exogenous/all_exog.parquet")  
EXOG_OUT_PATH = Path("../data/exogenous/all_exog_ready.parquet")

In [14]:
exog = pd.read_parquet(EXOG_IN_PATH).copy()
exog.drop(columns=['year', 'month', 'day'], inplace=True)

# Normalize date column
if "date" not in exog.columns:
    for alt in ["date_local", "Date"]:
        if alt in exog.columns:
            exog = exog.rename(columns={alt: "date"})
            break

exog["date"] = pd.to_datetime(exog["date"]).dt.normalize()
exog = exog.sort_values("date").reset_index(drop=True)

print("Exog date range:", exog["date"].min().date(), "→", exog["date"].max().date())
print("Exog columns:", list(exog.columns))

Exog date range: 2021-05-21 → 2025-04-30
Exog columns: ['date', 'iberian_exception', 'TTF', 'co2', 'coal', 'load_mean_mw_es', 'load_mean_mw_fr', 'load_mean_mw_pt', 'flow_mean_mw_es_fr', 'flow_mean_mw_es_pt', 'flow_mean_mw_fr_es', 'flow_mean_mw_pt_es', 'cac_eur_pts', 'ibex_eur_pts', 'psi_eur_pts']


In [16]:
# ---------------------------------------------------------------------
# Decide which columns are candidates for stationarity testing
# ---------------------------------------------------------------------
EXOG_DUMMY_COLS = [c for c in ["iberian_exception"] if c in exog.columns]

drop_non_reg = {"date", "year", "month", "day"}
numeric_cols = [c for c in exog.columns if c not in drop_non_reg and pd.api.types.is_numeric_dtype(exog[c])]

# exclude dummies from unit-root screening (keep as levels)
test_cols = [c for c in numeric_cols if c not in EXOG_DUMMY_COLS]

print("\nTesting these exogenous columns (levels):", test_cols)
print("Keeping these as dummies/levels:", EXOG_DUMMY_COLS)


Testing these exogenous columns (levels): ['TTF', 'co2', 'coal', 'load_mean_mw_es', 'load_mean_mw_fr', 'load_mean_mw_pt', 'flow_mean_mw_es_fr', 'flow_mean_mw_es_pt', 'flow_mean_mw_fr_es', 'flow_mean_mw_pt_es', 'cac_eur_pts', 'ibex_eur_pts', 'psi_eur_pts']
Keeping these as dummies/levels: ['iberian_exception']


In [17]:
# ---------------------------------------------------------------------
# Unit-root tests on levels and choose which to difference
# ---------------------------------------------------------------------
alpha = 0.05
exog_unit_root = unit_root_table(exog, test_cols, adf_reg="c", kpss_reg="ct")
print("\n=== Unit-root tests for exogenous variables (levels) ===")
print(exog_unit_root.round(4))

# Non-stationary if ADF fails to reject OR KPSS rejects stationarity
is_nonstationary = (exog_unit_root["adf_pvalue"] > alpha) | (exog_unit_root["kpss_pvalue"] < alpha)

nonstat_cols = exog_unit_root.loc[is_nonstationary, "series"].tolist()
stat_cols    = exog_unit_root.loc[~is_nonstationary, "series"].tolist()

print("\nNon-stationary (difference these):", nonstat_cols)
print("Stationary (keep in levels):", stat_cols)

look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, reg


=== Unit-root tests for exogenous variables (levels) ===
                series  nobs  adf_stat  adf_pvalue  kpss_stat  kpss_pvalue  \
0                  TTF  1441   -2.2543      0.1871     0.4505       0.0100   
1                  co2  1441   -3.1521      0.0229     0.7558       0.0100   
2                 coal  1441   -1.5538      0.5068     0.5172       0.0100   
3      load_mean_mw_es  1441   -4.2663      0.0005     0.1914       0.0192   
4      load_mean_mw_fr  1441   -3.0405      0.0313     0.1390       0.0629   
5      load_mean_mw_pt  1441   -3.6957      0.0042     0.1168       0.1000   
6   flow_mean_mw_es_fr  1441   -5.3140      0.0000     0.3887       0.0100   
7   flow_mean_mw_es_pt  1441   -3.8108      0.0028     0.1009       0.1000   
8   flow_mean_mw_fr_es  1441   -5.6050      0.0000     0.4854       0.0100   
9   flow_mean_mw_pt_es  1441   -4.4612      0.0002     0.0918       0.1000   
10         cac_eur_pts  1439   -2.0120      0.2813     0.3313       0.0100   
11    

In [18]:
# ---------------------------------------------------------------------
# Build regression-ready exog frame
# ---------------------------------------------------------------------
X = exog[["date"]].copy()

# Keep stationary in levels
for c in stat_cols:
    X[c] = exog[c]

# Difference only non-stationary
for c in nonstat_cols:
    X[f"d_{c}"] = exog[c].diff()

# Keep dummies as-is
for c in EXOG_DUMMY_COLS:
    X[c] = exog[c].fillna(0).astype(int)


In [19]:
# Verify differenced series stationarity
diff_cols = [f"d_{c}" for c in nonstat_cols]
if diff_cols:
    exog_diff_unit_root = unit_root_table(X, diff_cols, adf_reg="c", kpss_reg="c")
    print("\n=== Unit-root tests for differenced exogenous variables (only those differenced) ===")
    print(exog_diff_unit_root.round(4))

# Drop rows with NaNs induced by differencing (and any other missing values in regressors)
reg_cols = [c for c in X.columns if c != "date"]
X_out = X.dropna(subset=reg_cols).reset_index(drop=True)

print("\nRegression-ready exog shape:", X_out.shape)
print(X_out.head())

look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, regression=kpss_reg, nlags="auto")
look-up table. The actual p-value is greater than the p-value returned.

  kpss_res = kpss(s, reg


=== Unit-root tests for differenced exogenous variables (only those differenced) ===
                 series  nobs  adf_stat  adf_pvalue  kpss_stat  kpss_pvalue  \
0                 d_TTF  1440  -13.3411         0.0     0.0708          0.1   
1                 d_co2  1440  -34.5145         0.0     0.0833          0.1   
2                d_coal  1440   -7.9576         0.0     0.1348          0.1   
3     d_load_mean_mw_es  1440  -10.1641         0.0     0.0229          0.1   
4  d_flow_mean_mw_es_fr  1440  -11.2544         0.0     0.1489          0.1   
5  d_flow_mean_mw_fr_es  1440  -21.2319         0.0     0.1522          0.1   
6         d_cac_eur_pts  1438  -40.3445         0.0     0.0343          0.1   
7        d_ibex_eur_pts  1438  -26.0292         0.0     0.2892          0.1   
8         d_psi_eur_pts  1438  -38.0469         0.0     0.0165          0.1   

                    decision  
0  stationary (no unit root)  
1  stationary (no unit root)  
2  stationary (no unit root)  

In [20]:
# ---------------------------------------------------------------------
# Save
# ---------------------------------------------------------------------
EXOG_OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
X_out.to_parquet(EXOG_OUT_PATH, index=False)
print(f"\nSaved regression-ready exogenous variables to: {EXOG_OUT_PATH}")


Saved regression-ready exogenous variables to: ..\data\exogenous\all_exog_ready.parquet


In [21]:
X_out.head()

Unnamed: 0,date,load_mean_mw_fr,load_mean_mw_pt,flow_mean_mw_es_pt,flow_mean_mw_pt_es,d_TTF,d_co2,d_coal,d_load_mean_mw_es,d_flow_mean_mw_es_fr,d_flow_mean_mw_fr_es,d_cac_eur_pts,d_ibex_eur_pts,d_psi_eur_pts,iberian_exception
0,2021-05-24,41808.333333,5414.458333,1462.979167,33.15,-0.030001,1.39,-0.042563,3730.75,0.0,358.241667,-18.220215,3.200195,0.0,0
1,2021-05-25,49120.833333,5588.791667,834.070833,433.145833,1.43,-1.44,0.00421,923.083333,0.0,-432.154167,1.330078,-11.799805,-1.380371,0
2,2021-05-26,48747.916667,5594.833333,873.566667,206.879167,0.24,1.57,0.508413,203.5,0.0,331.120833,44.109863,-10.800781,28.550293,0
3,2021-05-27,48145.833333,5569.125,881.154167,219.420833,-1.23,-1.05,-0.01156,189.541667,0.0,183.1625,48.399902,38.5,-6.570312,0
4,2021-05-28,46372.916667,5564.666667,701.925,195.375,-0.27,-0.79,0.181423,-125.25,0.0,-40.758333,0.0,0.0,0.0,0
