 # Realized (Co)Variances & Semi(Co)Variances – Summary Check



 Input: intraday price.

 Output: summary statistics of:

   - daily realized variances/covariances (ReCov)

   - daily positive semicovariances (ReCov⁺)

   - daily negative semicovariances (ReCov⁻)

 for each series.

In [1]:
import numpy as np
import pandas as pd

In [2]:
file_path = '../data/endogenous/prices/filtered_data.parquet'
try:
    df = pd.read_parquet(file_path)
    print(f"Europe dataset loaded from {file_path}.")
    print(df.head())


    df = df.sort_values(["Area", "Start DateTime"])


except Exception as e:
    print(f"Error loading Europe dataset: {e}")

Europe dataset loaded from ../data/endogenous/prices/filtered_data.parquet.
       Start DateTime    Area          Sequence  Day-ahead Price (EUR/MWh)  \
0 2021-05-21 00:00:00  BZN|ES  Without sequence                      80.85   
1 2021-05-21 01:00:00  BZN|ES  Without sequence                      79.46   
2 2021-05-21 02:00:00  BZN|ES  Without sequence                      79.18   
3 2021-05-21 03:00:00  BZN|ES  Without sequence                      79.18   
4 2021-05-21 04:00:00  BZN|ES  Without sequence                      79.46   

         TimeDiff  NonHourly  
0             NaT      False  
1 0 days 01:00:00      False  
2 0 days 01:00:00      False  
3 0 days 01:00:00      False  
4 0 days 01:00:00      False  


In [3]:
prices = (
    df.pivot(
        index="Start DateTime",           # rows = timestamps
        columns="Area",                   # columns = markets, e.g. BZN|ES, BZN|FR, BZN|PT
        values="Day-ahead Price (EUR/MWh)"
    )
    .sort_index()
)

print(prices.head())

Area                 BZN|ES  BZN|FR  BZN|PT
Start DateTime                             
2021-05-21 00:00:00   80.85   37.24   80.85
2021-05-21 01:00:00   79.46   22.90   79.46
2021-05-21 02:00:00   79.18   14.29   79.18
2021-05-21 03:00:00   79.18    7.83   79.18
2021-05-21 04:00:00   79.46    7.08   79.46


 ## 0. Compute simple returns from prices

 $$r_{ji} = P_{ji} - P_{j,i-1}$$

    Simple returns r_t = P_t - P_{t-1} per column.

    prices: wide DataFrame, index = timestamps, columns = markets.

In [4]:
def compute_simple_returns(prices: pd.DataFrame) -> pd.DataFrame:
    """
    Simple returns r_t = P_t - P_{t-1} per column.
    prices: wide DataFrame, index = timestamps, columns = markets.
    """
    prices = prices.sort_index()
    rets = prices.diff().dropna()
    rets = rets.replace([np.inf, -np.inf], np.nan).dropna(how="any")
    rets["Date"] = rets.index.date
    return rets

returns = compute_simple_returns(prices)
print(returns.head())

Area                 BZN|ES  BZN|FR  BZN|PT        Date
Start DateTime                                         
2021-05-21 01:00:00   -1.39  -14.34   -1.39  2021-05-21
2021-05-21 02:00:00   -0.28   -8.61   -0.28  2021-05-21
2021-05-21 03:00:00    0.00   -6.46    0.00  2021-05-21
2021-05-21 04:00:00    0.28   -0.75    0.28  2021-05-21
2021-05-21 05:00:00   -0.28    9.61   -0.28  2021-05-21


 ## 1. Realized covariance matrices (ReCov)

 $$ReCov_d = \sum_{i \in d} r_i r_i'$$


In [5]:
def daily_realized_cov(rets: pd.DataFrame) -> dict:
    """
    Compute daily realized covariance matrices:
        R_d = sum_t r_{d,t} r_{d,t}'.
    rets: DataFrame of simple returns + 'Date' column.
    Returns:
        dict[date -> (N x N) DataFrame]
    """
    cov_dict = {}

    for day, grp in rets.groupby("Date"):
        R = grp.drop(columns="Date")
        cols = R.columns
        N = len(cols)

        mat = np.zeros((N, N))
        for row in R.values:
            r = row.reshape(-1, 1)  # (N x 1)
            mat += r @ r.T          # outer product

        cov_dict[pd.to_datetime(day)] = pd.DataFrame(mat, index=cols, columns=cols)

    return cov_dict

cov_matrices = daily_realized_cov(returns)
for day, cov in list(cov_matrices.items())[:5]:
    print(f"Covariance matrix for {day.date()}:")
    print(cov)

Covariance matrix for 2021-05-21:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  900.7055   610.1282  849.2810
BZN|FR  610.1282  3308.8123  396.1419
BZN|PT  849.2810   396.1419  856.6119
Covariance matrix for 2021-05-22:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  638.1532   546.4453  638.1532
BZN|FR  546.4453  2775.8191  546.4453
BZN|PT  638.1532   546.4453  638.1532
Covariance matrix for 2021-05-23:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  787.1981   596.2553  787.1981
BZN|FR  596.2553  3423.8128  596.2553
BZN|PT  787.1981   596.2553  787.1981
Covariance matrix for 2021-05-24:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  865.9875   770.1017  865.9875
BZN|FR  770.1017  3088.8052  770.1017
BZN|PT  865.9875   770.1017  865.9875
Covariance matrix for 2021-05-25:
Area      BZN|ES     BZN|FR    BZN|PT
Area                            

 ## 2. Realized semicovariances (ReCov⁺, ReCov⁻)



 Split returns into positive/negative parts and build:

* $ReCov^⁺_d$  (from positive moves),

* $ReCov^⁻_d$  (from negative moves).

In [6]:
def daily_semicovariances(rets: pd.DataFrame):
    """
    Compute daily positive and negative semicovariance matrices.

    For each day d:
        - rp = max(r, 0)
        - rn = min(r, 0)
        - ReCov⁺ := sum_t (rp_t rp_t' + rp_t rn_t')
        - ReCov⁻ := sum_t (rn_t rn_t' + rn_t rp_t')

    Returns:
        pos_dict, neg_dict : dict[date -> (N x N) DataFrame]
    """
    pos_dict = {}
    neg_dict = {}

    for day, grp in rets.groupby("Date"):
        R = grp.drop(columns="Date")
        cols = R.columns
        N = len(cols)

        cov_p = np.zeros((N, N))
        cov_n = np.zeros((N, N))
        m_plus = np.zeros((N, N))
        m_minus = np.zeros((N, N))

        for row in R.values:
            rp = np.clip(row, 0, None)     # positive part
            rn = np.clip(row, None, 0)     # negative part

            cov_p += np.outer(rp, rp)
            cov_n += np.outer(rn, rn)
            m_plus += np.outer(rp, rn)
            m_minus += np.outer(rn, rp)

        mat_pos = cov_p + m_plus
        mat_neg = cov_n + m_minus

        date = pd.to_datetime(day)
        pos_dict[date] = pd.DataFrame(mat_pos, index=cols, columns=cols)
        neg_dict[date] = pd.DataFrame(mat_neg, index=cols, columns=cols)

    return pos_dict, neg_dict
pos_semicov, neg_semicov = daily_semicovariances(returns)
for day in list(pos_semicov.keys())[:5]:
    print(f"Positive Semicovariance matrix for {day.date()}:")
    print(pos_semicov[day])
    print(f"Negative Semicovariance matrix for {day.date()}:")
    print(neg_semicov[day])

Positive Semicovariance matrix for 2021-05-21:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  522.6702   326.9917  504.0117
BZN|FR  274.1078  2006.6808  141.4650
BZN|PT  497.8713   179.1050  505.2022
Negative Semicovariance matrix for 2021-05-21:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  378.0353   283.1365  345.2693
BZN|FR  336.0204  1302.1315  254.6769
BZN|PT  351.4097   217.0369  351.4097
Positive Semicovariance matrix for 2021-05-22:
Area      BZN|ES     BZN|FR    BZN|PT
Area                                 
BZN|ES  439.2888   260.7214  439.2888
BZN|FR  212.1457  1800.8101  212.1457
BZN|PT  439.2888   260.7214  439.2888
Negative Semicovariance matrix for 2021-05-22:
Area      BZN|ES    BZN|FR    BZN|PT
Area                                
BZN|ES  198.8644  285.7239  198.8644
BZN|FR  334.2996  975.0090  334.2996
BZN|PT  198.8644  285.7239  198.8644
Positive Semicovariance matrix for 2021-05-23:
Area      

 ## 3. Convert daily matrices → vech time series

 One column per variance/covariance series (FR, ES, PT, FR-ES, FR-PT, ES-PT, ...)


In [7]:

def cov_dict_to_vech_df(cov_dict: dict) -> pd.DataFrame:
    """
    Flatten a dict of daily covariance matrices into a DataFrame where
    each column is a variance or covariance series (vech of each matrix).

    Ordering:
        [Var(m1), Var(m2), ..., Var(mN),
         Cov(m2,m1), Cov(m3,m1), ..., Cov(mN,m1),
         Cov(m3,m2), ..., Cov(mN,m2), ...]
    """
    # Take markets and fix an ordering
    example_mat = next(iter(cov_dict.values()))
    markets = list(example_mat.columns)
    markets_sorted = sorted(markets)

    # Build column labels
    labels = []
    for i, mi in enumerate(markets_sorted):
        labels.append(mi)  # variance
        for j in range(i):
            mj = markets_sorted[j]
            labels.append(f"{mi}-{mj}")  # covariance

    # Build rows (days)
    rows = []
    dates = []

    for date in sorted(cov_dict.keys()):
        mat = cov_dict[date].loc[markets_sorted, markets_sorted].values
        vec = []

        for i in range(len(markets_sorted)):
            # variance
            vec.append(mat[i, i])
            # covariances with previous markets
            for j in range(i):
                vec.append(mat[i, j])

        rows.append(vec)
        dates.append(pd.to_datetime(date))

    vech_df = pd.DataFrame(rows, index=pd.to_datetime(dates), columns=labels)
    return vech_df

print("Vech DataFrame of daily covariance matrices:")
vech_cov_df = cov_dict_to_vech_df(cov_matrices)
print(vech_cov_df.head())

Vech DataFrame of daily covariance matrices:
              BZN|ES     BZN|FR  BZN|FR-BZN|ES    BZN|PT  BZN|PT-BZN|ES  \
2021-05-21  900.7055  3308.8123       610.1282  856.6119       849.2810   
2021-05-22  638.1532  2775.8191       546.4453  638.1532       638.1532   
2021-05-23  787.1981  3423.8128       596.2553  787.1981       787.1981   
2021-05-24  865.9875  3088.8052       770.1017  865.9875       865.9875   
2021-05-25  262.7337  1546.6911       341.5632  245.3007       251.8947   

            BZN|PT-BZN|FR  
2021-05-21       396.1419  
2021-05-22       546.4453  
2021-05-23       596.2553  
2021-05-24       770.1017  
2021-05-25       320.1967  


 ## 4. Summary statistics helper (per series)



In [8]:

def summary_table(df: pd.DataFrame) -> pd.DataFrame:
    """
    Summary stats per column:
        Mean, SD, Min, Q1, Median, Q3, Max
    """
    desc = df.describe(percentiles=[0.25, 0.5, 0.75]).T
    out = desc.rename(
        columns={
            "mean": "Mean",
            "std": "SD",
            "min": "Min",
            "25%": "Q1",
            "50%": "Median",
            "75%": "Q3",
            "max": "Max",
        }
    )[["Mean", "SD", "Min", "Q1", "Median", "Q3", "Max"]]
    return out

print("Summary statistics for returns:")

Summary statistics for returns:


 ## 5. Main wrapper: from prices → summaries for ReCov, ReCov⁺, ReCov⁻


In [9]:

def realized_summary_from_prices(prices: pd.DataFrame):
    """
    Full pipeline:
        prices → simple returns → daily ReCov / ReCov⁺ / ReCov⁻ →
        vech time series → summary statistics.

    Input:
        prices: wide DataFrame with intraday prices.
    Returns:
        cov_df, semi_pos_df, semi_neg_df,
        cov_summary, semi_pos_summary, semi_neg_summary,
        comparison (multi-panel table)
    """
    # 1) returns
    rets = compute_simple_returns(prices)

    # 2) daily realized covariance
    daily_cov = daily_realized_cov(rets)

    # 3) daily semicovariances
    daily_pos, daily_neg = daily_semicovariances(rets)

    # 4) vech time series
    cov_df = cov_dict_to_vech_df(daily_cov)
    semi_pos_df = cov_dict_to_vech_df(daily_pos)
    semi_neg_df = cov_dict_to_vech_df(daily_neg)

    # 5) summaries per series
    cov_summary = summary_table(cov_df)
    semi_pos_summary = summary_table(semi_pos_df)
    semi_neg_summary = summary_table(semi_neg_df)

    # 6) side-by-side comparison: for each series, compare ReCov / ReCov⁺ / ReCov⁻
    comparison = pd.concat(
        {"ReCov": cov_summary, "ReCov_plus": semi_pos_summary, "ReCov_minus": semi_neg_summary},
        axis=1,
    )

    return (
        cov_df,
        semi_pos_df,
        semi_neg_df,
        cov_summary,
        semi_pos_summary,
        semi_neg_summary,
        comparison,
    )


In [10]:
# --- 1. Load prices in wide format -----------------------------------------
df_raw = pd.read_parquet("../data/endogenous/prices/filtered_data.parquet")

prices = (
    df_raw
    .sort_values(["Area", "Start DateTime"])
    .pivot(index="Start DateTime",
           columns="Area",
           values="Day-ahead Price (EUR/MWh)")
    .sort_index()
)

# --- 2. Run the realized measures pipeline ---------------------------------
(
    cov_df,
    semi_pos_df,
    semi_neg_df,
    cov_summary,
    semi_pos_summary,
    semi_neg_summary,
    comparison,
) = realized_summary_from_prices(prices)

# --- 3. Split ReVar (diagonal) and define ReCov summary (vars + covars) ----
var_cols = [c for c in cov_df.columns if "-" not in c]  # BZN|ES, BZN|FR, ...
cov_cols = [c for c in cov_df.columns if "-" in c]      # BZN|FR-BZN|ES, ...

# ReVar summary (only diagonal entries)
revar_summary = cov_summary.loc[var_cols]

# ReCov summary INCLUDING both variances and covariances
recov_summary = cov_summary  # <- now includes vars + covars

# (Optional) If you still want off-diagonal-only summary:
recov_offdiag_summary = cov_summary.loc[cov_cols]

print("=== Realized variances (ReVar: diagonal) – summary ===")
print(revar_summary)

print("\n=== Realized (co)variances (ReCov: vars + covars) – summary ===")
print(recov_summary)

# --- 4. ReCov⁺ and ReCov⁻ summaries (all entries) --------------------------
print("\n=== Positive semicovariances (ReCov⁺) – summary ===")
print(semi_pos_summary)

print("\n=== Negative semicovariances (ReCov⁻) – summary ===")
print(semi_neg_summary)

# --- 5. Sum ReCov⁺ + ReCov⁻ and compare to ReCov ---------------------------
semi_sum_df = semi_pos_df + semi_neg_df
semi_sum_summary = summary_table(semi_sum_df)

print("\n=== Sum of ReCov⁺ and ReCov⁻ (ReCov⁺ + ReCov⁻) – summary ===")
print(semi_sum_summary)

print("\nCheck: ReCov (raw) vs ReCov⁺ + ReCov⁻ (raw)")
print("Allclose on time series (vech):",
      np.allclose(cov_df.values, semi_sum_df.values))

summary_diff = cov_summary - semi_sum_summary
print("\nMax absolute difference between ReCov summary and (ReCov⁺ + ReCov⁻) summary:")
print(summary_diff.abs().max())

=== Realized variances (ReVar: diagonal) – summary ===
                Mean             SD       Min         Q1     Median  \
BZN|ES   5070.049812    5352.802490   10.4895  1818.3048  3452.1524   
BZN|FR  18392.401123  308355.502613  131.1228  2791.6638  5358.4056   
BZN|PT   4879.048279    5244.339929    9.6059  1742.1937  3299.5098   

                Q3           Max  
BZN|ES   6456.6980  5.304283e+04  
BZN|FR  10003.2682  1.169718e+07  
BZN|PT   6169.5043  5.304283e+04  

=== Realized (co)variances (ReCov: vars + covars) – summary ===
                       Mean             SD        Min         Q1     Median  \
BZN|ES          5070.049812    5352.802490    10.4895  1818.3048  3452.1524   
BZN|FR         18392.401123  308355.502613   131.1228  2791.6638  5358.4056   
BZN|FR-BZN|ES   3315.075506    4034.631019 -3817.8318  1109.5449  2367.6053   
BZN|PT          4879.048279    5244.339929     9.6059  1742.1937  3299.5098   
BZN|PT-BZN|ES   4803.624908    5196.194109  -665.4657  1713.

In [11]:
### Save realized measures for spillover *recalculation*

import os
import json

# Directory where we want to cache inputs for the spillover-recalc notebook
OUT_DIR = "../data/endogenous/prices"
os.makedirs(OUT_DIR, exist_ok=True)

# 1) Identify variance series (diagonal) from vech(ReCov)
#    By construction of cov_dict_to_vech_df, variances are the labels WITHOUT "-"
var_cols  = [c for c in cov_df.columns if "-" not in c]   # e.g. ['BZN|ES','BZN|FR','BZN|PT']
var_cols  = sorted(var_cols)                              # enforce deterministic order

# 2) Daily realized variances (ReVar) as an N-column DF
revar_df = cov_df[var_cols].copy()
revar_df.index.name = "Date"

# 3) Save ReVar and full ReCov vech
revar_path      = os.path.join(OUT_DIR, "revar_daily_recalculated.parquet")
recov_vech_path = os.path.join(OUT_DIR, "recov_vech_daily_recalculated.parquet")

revar_df.to_parquet(revar_path)
cov_df.to_parquet(recov_vech_path)

# 4) Save ReCov⁺ and ReCov⁻ vech
recov_pos_vech_path = os.path.join(OUT_DIR, "recov_pos_vech_daily_recalculated.parquet")
recov_neg_vech_path = os.path.join(OUT_DIR, "recov_neg_vech_daily_recalculated.parquet")

semi_pos_df.to_parquet(recov_pos_vech_path)
semi_neg_df.to_parquet(recov_neg_vech_path)

# 5) Save common vech labels (order of columns)
vech_labels_path = os.path.join(OUT_DIR, "recov_vech_labels_recalculated.json")
with open(vech_labels_path, "w") as f:
    json.dump(list(cov_df.columns), f)

print("Saved realized measures for spillover recalculation:")
print(f" - ReVar daily (diag only):        {revar_path}")
print(f" - ReCov vech daily:               {recov_vech_path}")
print(f" - ReCov⁺ vech daily:              {recov_pos_vech_path}")
print(f" - ReCov⁻ vech daily:              {recov_neg_vech_path}")
print(f" - Vech labels JSON:               {vech_labels_path}")


Saved realized measures for spillover recalculation:
 - ReVar daily (diag only):        ../data/endogenous/prices\revar_daily_recalculated.parquet
 - ReCov vech daily:               ../data/endogenous/prices\recov_vech_daily_recalculated.parquet
 - ReCov⁺ vech daily:              ../data/endogenous/prices\recov_pos_vech_daily_recalculated.parquet
 - ReCov⁻ vech daily:              ../data/endogenous/prices\recov_neg_vech_daily_recalculated.parquet
 - Vech labels JSON:               ../data/endogenous/prices\recov_vech_labels_recalculated.json
