# Temporally Trace Carbon Emissions in Pumped Hydro Storage Plants

This notebook temporally traces carbon emissions of a pumped hydro storage (PHS) plant using time series of production and filling level data. This data is provided by Energy Quantified and fetched via their Python client.

The plants in use are the Kruonis plant in Lithuania and the Čierny Váh plant in Slovakia.

| Plant | LT Kruonius | SK Čierny Váh |
|---|---|---|
| Max. Capacity | 900 MW | 735.16 MW |
| Efficiency | 74 % | 73 % |
| Max. Storage | 10,800 MWh | 4,000 MWh | 
| Avg. CI in Area | 105 kgCO2eq/MWh | 162 kgCO2eq/MWh |

In [None]:
import sys
import os

# required to use src module from parent directory
sys.path.append(os.path.abspath(os.path.join(os.path.abspath(''), '..')))
print(f"Appended parent directory to sys.path: {os.path.abspath(os.path.join(os.path.abspath(''), '..'))}")

from datetime import datetime
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from energyquantified import EnergyQuantified
from energyquantified.metadata import Area, DataType
from energyquantified.metadata.curve import CurveType
from energyquantified.time import CET
from energyquantified.metadata.area import AT, BA, BE, BG, CH, CZ, DE, DK1, DK2, EE, ES, FI, FR, GB, GR, HR, HU, IE, IT_CALA, IT_CNOR, IT_CSUD, IT_NORD, IT_SARD, IT_SICI, IT_SUD, LT, LV, NIE, NL, NO1, NO2, NO3, NO4, NO5, PL, PT, RO, RS, SE1, SE2, SE3, SE4, SI, SK, TR

from src.carbon_intensity import add_average_ci_to_phs, add_temporal_matching_ci_to_phs, calculate_ci_cons, calculate_ci_teta, trace_flows

# set print options to print all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# set sns figure size
sns.set_theme(rc={'figure.figsize':(15, 7)})

In [None]:
# setup EQ's Python client
eq = EnergyQuantified(api_key_file="../eq_api_key.txt")
eq.is_api_key_valid()

In [3]:
areas: list[Area] = [
    AT,
    BA,
    BE,
    BG,
    CH,
    CZ,
    DE,
    DK1,
    DK2,
    EE,
    ES,
    FI,
    FR,
    GB,
    GR,
    HR,
    HU,
    IE,
    IT_CALA,
    IT_CNOR,
    IT_CSUD,
    IT_NORD,
    IT_SARD,
    IT_SICI,
    IT_SUD,
    LT,
    LV,
    NIE,
    NL,
    NO1,
    NO2,
    NO3,
    NO4,
    NO5,
    PL,
    PT,
    RO,
    RS,
    SE1,
    SE2,
    SE3,
    SE4,
    SI,
    SK,
    TR,
]

load_types: list[str] = [
    "Consumption",
    "Exchange Day-Ahead Schedule Net Export",
    "Hydro Pumped-storage Pumping",
]

prod_types: list[str] = [
    "Bioenergy Power",
    "Biogas Power",
    "Biomass Power",
    "CHP Power",
    "Derived Gas Power",
    "Geothermal Power",
    "Hard Coal Power",
    "Hydro Pumped-storage",
    "Hydro Reservoir",
    "Hydro Run-of-river",
    "Lignite Power",
    "Natural Gas Power",
    "Nuclear",
    "Oil Power",
    "Oil Shale Power",
    "Other Power",
    "Peat Power",
    "Solar Photovoltaic",
    "Waste Power",
    "Wind Power",
]

phs_plants: list[tuple[Area, str]] = [
    (LT, "Kruonis", 105),
    (SK, "Cierny-Vah", 162)
]

begin_date = datetime(2020, 1, 1, tzinfo=CET)
end_date = datetime(2024, 1, 1, tzinfo=CET)

In [None]:
df_index = pd.date_range(start=begin_date, end=end_date, freq='H', tz=CET)
df = pd.DataFrame(index=df_index)

# Load Data

In [None]:
for area in areas:
    print(f"Fetching load data for {area.tag}...")
    for load_type in load_types:
        data_type = DataType.ACTUAL
        # NL Consumption must be synthetic
        if load_type == "Consumption" and area.tag == 'NL':
            data_type = DataType.SYNTHETIC
        q = load_type
        # Irish and Northern Irish Net Export must be Exchange Day-Ahead Schedule Net Export
        # if q == "Exchange Final Schedule Net Export" and area.tag in ["IE", "NIE"]:
        #     q = "Exchange Day-Ahead Schedule Net Export"
        curves = eq.metadata.curves(
            q=q,
            area=area,
            curve_type=CurveType.TIMESERIES,
            data_type=data_type,
            exact_category=q,
            has_place=False   
        )
        if len(curves) == 0:
            continue
        if len(curves) > 1:
            print(f"WARN - Found more than one curve for {area.tag} and {load_type}")
        curve = curves[0]
        ts = eq.timeseries.load(curve=curve, begin=begin_date, end=end_date, frequency='PT1H', time_zone='CET')
        df[f"{area.tag} {"Net Export" if load_type == "Exchange Day-Ahead Schedule Net Export" else load_type}"] = ts.to_df(name='val', single_level_header=True)['val']
        df[f"{area.tag} {"Net Export" if load_type == "Exchange Day-Ahead Schedule Net Export" else load_type}"] = df[f"{area.tag} {"Net Export" if load_type == "Exchange Day-Ahead Schedule Net Export" else load_type}"].fillna(0.0)

df.filter(regex="(Consumption|Net Export|Hydro Pumped-storage Pumping)").info(1000)

In [None]:
for area in areas:
    print(f"Fetching production data for {area.tag}...")
    for prod_type in prod_types:
        curves = eq.metadata.curves(
            q=prod_type,
            area=area,
            curve_type=CurveType.TIMESERIES,
            data_type=DataType.ACTUAL,
            exact_category=f"{prod_type} Production",
            has_place=False   
        )
        if len(curves) == 0:
            continue
        if len(curves) > 1:
            print(f"WARN - Found more than one curve for {area.tag} and {prod_type}")
        curve = curves[0]
        ts = eq.timeseries.load(curve=curve, begin=begin_date, end=end_date, frequency='PT1H', time_zone='CET')
        df[f"{area.tag} {prod_type}"] = ts.to_df(name='val', single_level_header=True)['val']
        df[f"{area.tag} {prod_type}"] = df[f"{area.tag} {prod_type}"].fillna(0.0)

In [None]:
for area in areas:
    print(f"Fetching exchange data for {area.tag}...")
    for en in area.exchange_neighbours:
        ec = "Exchange Day-Ahead Schedule"
        # Irish and Northern Irish Exchange must be Exchange Day-Ahead Schedule
        # if area in [IE, NIE]:
        #     ec = "Exchange Day-Ahead Schedule"
        curves = eq.metadata.curves(
            q=f"{area.tag}>{en.tag} Exchange",
            area=area,
            curve_type=CurveType.TIMESERIES,
            data_type=DataType.ACTUAL,
            exact_category=ec,
            has_place=False   
        )
        if len(curves) == 0:
            continue
        if len(curves) > 2:
            print(f"WARN - Found more than one curve for exchange between {area.tag} and {en.tag}")
        curve = curves[0]
        if not curve.name.startswith(area.tag):
            curve = curves[1]
        ts = eq.timeseries.load(curve=curve, begin=begin_date, end=end_date, frequency='PT1H', time_zone='CET')
        df[f"{area.tag}>{en.tag} Exchange"] = ts.to_df(name='val', single_level_header=True)['val']
        df[f"{area.tag}>{en.tag} Exchange"] = df[f"{area.tag}>{en.tag} Exchange"].fillna(0.0)

In [None]:
for area, plant, _ in phs_plants:
    print(f"Fetching production data for {area.tag} @{plant}...")
    curves = eq.metadata.curves(
        q=f"@{plant} Hydro Pumped-storage Production",
        area=area,
        curve_type=CurveType.TIMESERIES,
        data_type=DataType.ACTUAL,
        exact_category=f"Hydro Pumped-storage Production",
        has_place=True
    )
    if len(curves) == 0:
        continue
    if len(curves) > 1:
        print(f"WARN - Found more than one curve for {area.tag} and {prod_type}")
    curve = curves[0]
    ts = eq.timeseries.load(curve=curve, begin=begin_date, end=end_date, frequency='PT1H', time_zone='CET')
    print(ts)
    df[f"{area.tag} @{plant} Hydro Pumped-storage Production"] = ts.to_df(name='val', single_level_header=True)['val']
    df[f"{area.tag} @{plant} Hydro Pumped-storage Production"] = df[f"{area.tag} @{plant} Hydro Pumped-storage Production"].fillna(0.0)

df.filter(regex="(Hydro Pumped-storage Production)").info()

In [None]:
for area, plant, _ in phs_plants:
    print(f"Load filling level data for {area.tag} @{plant}...")
    df[f"{area.tag} @{plant} Filling Level"] = pd.read_csv(f"../data/{area.tag.lower()}_{plant.lower()}_filling_level.csv", index_col=0, parse_dates=True)
    df[f"{area.tag} @{plant} Filling Level"] = df[f"{area.tag} @{plant} Filling Level"].interpolate(method='time')

df.filter(regex="(Filling Level)").info()

In [None]:
df = df.round(2)[:-1]
df.describe()

# Calculate CI SEA

SEA stands for Self-Emission Approach

In [None]:
# importlib.reload(sys.modules["src.carbon_intensity"])
from src.carbon_intensity import calculate_ci_prod

# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("Carbon Intensity Production SEA|Carbon Emissions Production SEA|Total Production SEA")]

for area in areas:
    print(f"Calculating production CI SEA for {area.tag}...")
    ci_series, ce_seris, total_prod_series = calculate_ci_prod(df, area.tag)
    df = pd.concat([df, ci_series, ce_seris, total_prod_series], axis=1)
    df[f"{area.tag} Carbon Intensity Production SEA"] = ci_series
    df[f"{area.tag} Carbon Emissions Production SEA"] = ce_seris
    df[f"{area.tag} Total Production SEA"] = total_prod_series

In [None]:
# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("Carbon Intensity Consumption SEA|Carbon Emissions Consumption SEA|Total Load SEA")]

print(f"Calculating consumption CI SEA...")
ci_cons_df: pd.DataFrame = calculate_ci_cons(df, [area.tag for area in areas], " SEA")

for area in areas:
    df[f"{area.tag} Carbon Intensity Consumption SEA"] = ci_cons_df[f"{area.tag} Carbon Intensity Consumption"]
    df[f"{area.tag} Carbon Emissions Consumption SEA"] = ci_cons_df[f"{area.tag} Carbon Emissions Consumption"]
    df[f"{area.tag} Total Load SEA"] = ci_cons_df[f"{area.tag} Total Load"]

# Calculate CI AEAA

AEAA stands for Average Emission Add-on Approach

In [None]:
# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("Carbon Intensity Production AEAA|Carbon Emissions Production AEAA")]

for area, plant, _ in phs_plants:
    print(f"Calculating production CI AEAA for {area.tag}...")
    ci_series, ce_seris = add_average_ci_to_phs(df, area.tag, plant, " SEA")
    # df = pd.concat([df, ci_series, ce_seris], axis=1)
    df[f"{area.tag} Carbon Intensity Production AEAA"] = ci_series
    df[f"{area.tag} Carbon Emissions Production AEAA"] = ce_seris

df.filter(regex="(Carbon Intensity Production AEAA|Carbon Emissions Production AEAA)").info()


# Calculate CI TMEA

TMEA stands for Temporal Matching Emission Approach

In [None]:
# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("Carbon Intensity Production TMEA|Carbon Emissions Production TMEA")]

for area, plant, _ in phs_plants:
    print(f"Calculating production CI TMEA for {area.tag}...")
    ci_series, ce_seris = add_temporal_matching_ci_to_phs(df, area.tag, plant, " SEA")
    # df = pd.concat([df, ci_series, ce_seris], axis=1)
    df[f"{area.tag} Carbon Intensity Production TMEA"] = ci_series
    df[f"{area.tag} Carbon Emissions Production TMEA"] = ce_seris


# Calculate & Save TETA

Due to the heavy computation, the data frame is split into years. Each year is calculated and stored separately.

TETA stands for Temporal Emission Tracing Approach

In [None]:
type = "lt_sk"

prev_last_row: pd.Series = None
# prev_last_row = pd.read_csv("../data/ci_teta_2023_none.csv").iloc[-1]

# Set the pandas option to opt-in to the future behavior
pd.set_option('future.no_silent_downcasting', True)

for year in range(begin_date.year, end_date.year + 1):
    print(f"Calculating production CI TETA for {year}...")
    ci_teta_df = calculate_ci_teta(df.loc[f"{year}-01-01":f"{year}-12-31"], [area.tag for area in areas], [(area.tag, plant, default_val) for area, plant, default_val in phs_plants], prev_last_row)
    ci_teta_df.to_csv(f"../data/ci_teta_{year}_{type}.csv")

    prev_last_row = ci_teta_df.iloc[-1]

# Set the pandas option to opt-in to the future behavior
pd.set_option('future.no_silent_downcasting', False)

# ci_teta_df = calculate_ci_teta(df, [area.tag for area in areas], [(area.tag, plant, init_val) for area, plant in phs_plants], " SEA")

# Load TETA

In [None]:
# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("TETA")]

type = "lt_sk"

for year in range(begin_date.year, end_date.year):
    year_df = pd.read_csv(f"../data/ci_teta_{year}_{type}.csv", index_col=0, parse_dates=True)
    year_df.index = pd.to_datetime(year_df.index, utc=True)
    year_df.index = year_df.index.tz_convert('CET')

    for area in areas:
        df.loc[year_df.index, f"{area.tag} Carbon Intensity Production TETA"] = year_df[f"{area.tag} Carbon Intensity Production"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} Carbon Emissions Production TETA"] = year_df[f"{area.tag} Carbon Emissions Production"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} Carbon Intensity Consumption TETA"] = year_df[f"{area.tag} Carbon Intensity Consumption"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} Carbon Emissions Consumption TETA"] = year_df[f"{area.tag} Carbon Emissions Consumption"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} Total Production TETA"] = year_df[f"{area.tag} Total Production"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} Total Load TETA"] = year_df[f"{area.tag} Total Load"].replace(0, np.nan).replace(1600, np.nan)
    for (area, plant, _) in phs_plants:
        df.loc[year_df.index, f"{area.tag} @{plant} Carbon Intensity TETA"] = year_df[f"{area.tag} @{plant} Carbon Intensity"].replace(0, np.nan).replace(1600, np.nan)
        df.loc[year_df.index, f"{area.tag} @{plant} Carbon Emissions TETA"] = year_df[f"{area.tag} @{plant} Carbon Emissions"].replace(0, np.nan).replace(1600, np.nan)

df.filter(regex='(TETA)$').info(max_cols=1000)

# Analyse CI TETA

In [29]:
# remove columns that will be added by this cell
df = df.loc[:, ~df.columns.str.contains("Flow")]

flows_df = df.apply(lambda row: trace_flows(row, [area.tag for area in areas]), axis=1)
df = pd.concat([df, flows_df], axis=1)

In [None]:
for area, pleant, __ in phs_plants:
    df[f"{area.tag} @{pleant} Carbon Intensity TETA"] = df[f"{area.tag} @{pleant} Carbon Intensity TETA"].replace([0.0, 1600.0], np.nan)

# limit df to begin and end date
df = df.loc[begin_date:end_date]
df.info()

In [31]:
limit = 10

In [None]:
flows_load_sums: list[tuple[Area, float, float]] = []
flows_load_sums.clear()
for area in areas:
    if area == SK:
        continue
    sk_area_flows_sum = df[f"SK>{area.tag} Flow"].clip(lower=0).sum()
    area_total_load_sum = df[f"{area.tag} Total Load TETA"].sum()
    flows_load_sums.append((area, sk_area_flows_sum, area_total_load_sum))

# sort flows by total amount
flows_load_sums.sort(key=lambda x: x[1], reverse=True)
# print top
print("Top areas by flow amount:")
for area, flows_sum, load_sum in flows_load_sums[:limit]:
    print(f"SK>{area.tag} Flows: {flows_sum:.2f} MWh, {area.tag} Total Load: {load_sum:.2f} MWh, Flows / Load: {flows_sum / load_sum * 100:.4f}%")
print("")
# print bottom
print("Bottom areas by flow amount:")
for area, flows_sum, load_sum in flows_load_sums[-limit:]:
    print(f"SK>{area.tag} Flows: {flows_sum:.2f} MWh, {area.tag} Total Load: {load_sum:.2f} MWh, Flows / Load: {flows_sum / load_sum * 100:.4f}%")
print("")

# sort flows and loads by share of flows
flows_load_sums.sort(key=lambda x: x[1] / x[2], reverse=True)
# print top
print("Top areas by share of flows:")
for area, flows_sum, load_sum in flows_load_sums[:limit]:
    print(f"SK>{area.tag} Flows: {flows_sum:.2f} MWh, {area.tag} Total Load: {load_sum:.2f} MWh, Flows / Load: {flows_sum / load_sum * 100:.4f}%")
# print top
print("")
print("Bottom areas by share of flows:")
for area, flows_sum, load_sum in flows_load_sums[-limit:]:
    print(f"SK>{area.tag} Flows: {flows_sum:.2f} MWh, {area.tag} Total Load: {load_sum:.2f} MWh, Flows / Load: {flows_sum / load_sum * 100:.4f}%")

In [None]:
ci_columns = []
ci_diff_columns = []
for area in areas:
    ci_columns.append(f"{area.tag} Carbon Intensity Production SEA")
    ci_columns.append(f"{area.tag} Carbon Intensity Production TETA")
    ci_columns.append(f"{area.tag} Carbon Intensity Consumption SEA")
    ci_columns.append(f"{area.tag} Carbon Intensity Consumption TETA")

df[ci_columns] = df[ci_columns].round(5).clip(lower=0.0, upper=1600.0).replace([0.0, 1600.0], np.nan)

for area in areas:
    df[f"{area.tag} Carbon Intensity Production Diff"] = df[f"{area.tag} Carbon Intensity Production TETA"].mean() - df[f"{area.tag} Carbon Intensity Production SEA"].mean()
    df[f"{area.tag} Carbon Intensity Consumption Diff"] = df[f"{area.tag} Carbon Intensity Consumption TETA"].mean() - df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
    ci_diff_columns.append(f"{area.tag} Carbon Intensity Production Diff")
    ci_diff_columns.append(f"{area.tag} Carbon Intensity Consumption Diff")

df[ci_diff_columns] = df[ci_diff_columns].round(5)
df[ci_diff_columns].describe()

In [None]:
ci_diffs: list[tuple[Area, float, float, float, float]] = []
for area in areas:
    ci_cons_sea = df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
    ci_cons_teta = df[f"{area.tag} Carbon Intensity Consumption TETA"].mean()
    ci_cons_diff = df[f"{area.tag} Carbon Intensity Consumption TETA"].mean() - df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
    ci_cons_diff_rel = (ci_cons_diff) / df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()

    ci_diffs.append((area, ci_cons_sea, ci_cons_teta, ci_cons_diff, ci_cons_diff_rel)) 

# sort by absolute difference in consumption
ci_diffs.sort(key=lambda x: x[3], reverse=True)
# print top
print("Top areas by absolute difference in consumption CI:")
for area, ci_cons_sea, ci_cons_teta, ci_diff, ci_diff_rel in ci_diffs[:limit]:
    print(f"{area.tag} CI Consumption: from {ci_cons_sea:.10f} kgCO2eq/Mwh\tto {ci_cons_teta:.10f} kgCO2eq/Mwh\t=> {ci_diff:.10f} kgCO2eq/MWh\t=> {ci_diff_rel*100:.10f}%")
print()
# print bottom
print("Bottom areas by absolute difference in consumption CI:")
for area, ci_cons_sea, ci_cons_teta, ci_diff, ci_diff_rel in ci_diffs[-limit:]:
    print(f"{area.tag} CI Consumption: from {ci_cons_sea:.10f} kgCO2eq/Mwh\tto {ci_cons_teta:.10f} kgCO2eq/Mwh\t=> {ci_diff:.10f} kgCO2eq/MWh\t=> {ci_diff_rel*100:.10f}%")
print()

# sort by relative difference
ci_diffs.sort(key=lambda x: x[4], reverse=True)
# print top
print("Top areas by relative difference in consumption CI:")
for area, ci_cons_sea, ci_cons_teta, ci_diff, ci_diff_rel in ci_diffs[:limit]:
    print(f"{area.tag} CI Consumption: from {ci_cons_sea:.10f} kgCO2eq/Mwh\tto {ci_cons_teta:.10f} kgCO2eq/Mwh\t=> {ci_diff:.10f} kgCO2eq/MWh\t=> {ci_diff_rel*100:.10f}%")
print()
# print bottom
print("Bottom areas by relative difference in consumption CI:")
for area, ci_cons_sea, ci_cons_teta, ci_diff, ci_diff_rel in ci_diffs[-limit:]:
    print(f"{area.tag} CI Consumption: from {ci_cons_sea:.10f} kgCO2eq/Mwh\tto {ci_cons_teta:.10f} kgCO2eq/Mwh\t=> {ci_diff:.10f} kgCO2eq/MWh\t=> {ci_diff_rel*100:.10f}%")
print()

In [None]:
import math
from scipy import stats

areas_with_diff = []

# count hours between begin and end date
hours = df.shape[0]
print(f"Hours: {hours}")

for area in areas:
    df_clean = df.dropna(subset=[f"{area.tag} Carbon Intensity Consumption SEA", f"{area.tag} Carbon Intensity Consumption TETA"])

    # Paired t-test
    t_stat, p_value = stats.ttest_rel(df_clean[f"{area.tag} Carbon Intensity Consumption TETA"], df_clean[f"{area.tag} Carbon Intensity Consumption SEA"])

    # Interpretation
    alpha = 0.05
    if p_value < alpha:
        teta_mean = df_clean[f"{area.tag} Carbon Intensity Consumption TETA"].mean()
        sea_mean = df_clean[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
        diffs = teta_mean - sea_mean
        d = diffs.mean() / diffs.std()
        d_z = t_stat / math.sqrt(df_clean.shape[0])
        # d_z = t_stat / math.sqrt(96)
        # print(f'area: {area.tag} \tP-value: {p_value:.10f} \td_z: {d_z:.4f} \td: {d:.4f}')
        print(f'"{area.tag}", "{d_z:.4f}", "{diffs.mean():.7f}", "{sea_mean:.7f}", "{teta_mean:.7f}",')
        areas_with_diff.append(area)


In [None]:
entries: list[tuple[Area, float, float, float, float, float, float, float]] = []
entries.clear()
for area in areas:
    area_total_load_sum = df[f"{area.tag} Total Load TETA"].sum()
    if area == SK:
        sk_area_flows_sum = 0
        sk_load_share = 0
    else:
        sk_area_flows_sum = df[f"SK>{area.tag} Flow"].clip(lower=0).sum()
        sk_load_share = sk_area_flows_sum / area_total_load_sum
    ci_prod_diff = df[f"{area.tag} Carbon Intensity Production TETA"].mean() - df[f"{area.tag} Carbon Intensity Production SEA"].mean()
    ci_prod_diff_rel = (ci_prod_diff) / df[f"{area.tag} Carbon Intensity Production SEA"].mean()
    ci_cons_diff = df[f"{area.tag} Carbon Intensity Consumption TETA"].mean() - df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
    ci_cons_diff_rel = (ci_cons_diff) / df[f"{area.tag} Carbon Intensity Consumption SEA"].mean()
    entries.append((area, sk_area_flows_sum, area_total_load_sum, sk_load_share, ci_prod_diff, ci_prod_diff_rel, ci_cons_diff, ci_cons_diff_rel))

# sort by relative difference of cons CI
entries.sort(key=lambda x: x[3], reverse=True)
for area, sk_flows, total_load, sk_load_share, ci_prod_diff, ci_prod_diff_rel, ci_cons_diff, ci_cons_diff_rel in entries:
    print(f'"{area.tag}", "{(sk_flows / 1000):.10f}", "{(sk_load_share * 100):.10f}%", "{ci_cons_diff:.10f}", "{(ci_cons_diff_rel * 100):.10f}%",')

In [None]:
df[[
    'LT Carbon Intensity Production SEA',
    'LT Carbon Intensity Production AEAA',
    'LT Carbon Intensity Production TMEA',
    'LT Carbon Intensity Production TETA',
    'LT @Kruonis Carbon Intensity TETA',
]].describe()

In [None]:
sea_mean = df["LT Carbon Intensity Production SEA"].mean()
aeaa_mean = df["LT Carbon Intensity Production AEAA"].mean()
tmea_mean = df["LT Carbon Intensity Production TMEA"].mean()
teta_mean = df["LT Carbon Intensity Production TETA"].mean()

aeaa_diff_rel = (aeaa_mean - sea_mean) / sea_mean
tmea_diff_rel = (tmea_mean - sea_mean) / sea_mean
teta_diff_rel = (teta_mean - sea_mean) / sea_mean

print(f"LT AEAA mean: {aeaa_mean}, diff: {aeaa_diff_rel*100:.2f}%")
print(f"LT TMEA mean: {tmea_mean}, diff: {tmea_diff_rel*100:.2f}%")
print(f"LT TETA mean: {teta_mean}, diff: {teta_diff_rel*100:.2f}%")

kruonis_ci_mean = df["LT @Kruonis Carbon Intensity TETA"].mean()
total_prod_sum = df["LT Total Production"].sum()
kruonis_prod_sum = df["LT @Kruonis Hydro Pumped-storage Production"].clip(lower=0).sum()

print(f"LT Total Production: {total_prod_sum}")
print(f"LT Kruonis Carbon Intensity: {kruonis_ci_mean}")
print(f"LT Kruonis Production: {kruonis_prod_sum}")
print(f"LT Kruonis Production / Total Production: {kruonis_prod_sum / total_prod_sum * 100:.2f}%")

In [None]:
plt.figure(figsize=(15, 4))
sns.boxenplot(
    data=df[[
        'LT Carbon Intensity Production SEA',
        'LT Carbon Intensity Production AEAA',
        'LT Carbon Intensity Production TMEA',
        'LT Carbon Intensity Production TETA',
        # 'LT @Kruonis Carbon Intensity TETA',
    ]],
    showfliers=False,
    orient='h',
)
plt.xlabel("Carbon Intensity [kgCO2eq/MWh]")
# plt.xlim(0, 700)
plt.ylabel("Model")
plt.yticks(labels=[
    "LT SEA",
    "LT AEAA",
    "LT TMEA",
    "LT TETA",
    # "LT Kruonis",
    ],
    ticks=range(0,4))
plt.show()

In [None]:
plt.figure(figsize=(15, 4))
sns.histplot(
    data=df[[
        'LT Carbon Intensity Production SEA',
        'LT Carbon Intensity Production AEAA',
        'LT Carbon Intensity Production TMEA',
        'LT Carbon Intensity Production TETA',
        'LT @Kruonis Carbon Intensity TETA',
    ]],
    kde=True,
)
plt.xlabel("Carbon Intensity [kgCO2eq/MWh]")
# plt.xlim(0, 700)
# plt.ylabel("Model")
plt.show()

In [None]:
plt.figure(figsize=(15, 3))
sns.boxenplot(
    data=df[
        df["LT @Kruonis Hydro Pumped-storage Production"] > 0
    ][[
        # 'LT Carbon Intensity Production SEA',
        'LT Carbon Intensity Production TETA',
        'LT @Kruonis Carbon Intensity TETA',
    ]],
    showfliers=False,
    orient='h',
)
plt.xlabel("Carbon Intensity [kgCO2eq/MWh]")
# plt.xlim(0, 700)
plt.ylabel("Model")
plt.yticks(labels=[
    # "SEA",
    "LT TETA",
    "LT Kruonis",
    ],
    ticks=range(0,2))
plt.show()

In [None]:
df[[
    'SK Carbon Intensity Production SEA',
    'SK Carbon Intensity Production AEAA',
    'SK Carbon Intensity Production TMEA',
    'SK Carbon Intensity Production TETA',
    'SK Carbon Intensity Consumption SEA',
    'SK Carbon Intensity Consumption TETA'
]].describe()

In [None]:
sea_mean = df["SK Carbon Intensity Production SEA"].mean()
aeaa_mean = df["SK Carbon Intensity Production AEAA"].mean()
tmea_mean = df["SK Carbon Intensity Production TMEA"].mean()
teta_mean = df["SK Carbon Intensity Production TETA"].mean()
sea_cons_mean = df["SK Carbon Intensity Consumption SEA"].mean()
teta_cons_mean = df["SK Carbon Intensity Consumption TETA"].mean()

aeaa_diff_rel = (aeaa_mean - sea_mean) / sea_mean
tmea_diff_rel = (tmea_mean - sea_mean) / sea_mean
teta_diff_rel = (teta_mean - sea_mean) / sea_mean
teta_cons_diff_rel = (teta_cons_mean - sea_cons_mean) / sea_cons_mean

print(f"SK AEAA mean: {aeaa_mean}, diff: {aeaa_diff_rel*100:.2f}%")
print(f"SK TMEA mean: {tmea_mean}, diff: {tmea_diff_rel*100:.2f}%")
print(f"SK TETA mean: {teta_mean}, diff: {teta_diff_rel*100:.2f}%")
print(f"SK TETA cons mean: {teta_cons_mean}, diff: {teta_cons_diff_rel*100:.2f}%")

total_prod_sum = df["SK Total Production"].sum()
total_load_sum = df["SK Total Load TETA"].sum()
cv_prod_sum = df["SK @Cierny-Vah Hydro Pumped-storage Production"].clip(lower=0.0).sum()

print(f"SK Total Production: {total_prod_sum}")
print(f"SK Total Load: {total_load_sum}")
print(f"SK Kruonis Production: {cv_prod_sum}")
print(f"SK Kruonis Production / Total Production: {cv_prod_sum / total_prod_sum * 100:.2f}%")
print(f"SK Kruonis Production / Total Load: {cv_prod_sum / total_load_sum * 100:.2f}%")

In [None]:
plt.figure(figsize=(15, 5))
sns.boxenplot(
    data=df[[
        'SK Carbon Intensity Production SEA',
        'SK Carbon Intensity Production AEAA',
        'SK Carbon Intensity Production TMEA',
        'SK Carbon Intensity Production TETA',
        'SK @Cierny-Vah Carbon Intensity TETA',
    ]],
    showfliers=False,
    orient='h',
)
plt.xlabel("Carbon Intensity [kgCO2eq/MWh]")
plt.xlim(0, 400)
plt.ylabel("Model")
plt.yticks(labels=[
    "SEA",
    "AEAA",
    "TMEA",
    "TETA",
    "Cierny-Vah",
    ],
    ticks=range(0,5))
plt.show()

In [None]:
columns = [
        "SK Carbon Intensity Production TETA",
        "SE3 Carbon Intensity Consumption SEA",
        "SE2 Carbon Intensity Consumption SEA",
        "DE Carbon Intensity Consumption SEA",
        "CZ Carbon Intensity Consumption SEA",
        "PL Carbon Intensity Consumption SEA",
        "NO2 Carbon Intensity Consumption SEA",
    ]

plt.figure(figsize=(15, 5))
sns.boxenplot(
    df[columns],
    orient='h',
    showfliers=False,
)
plt.xlabel("Carbon Intensity [kgCO2eq/MWh]")
plt.yticks(labels=[
    "SK CI Production TETA",
    "SE3 CI Consumption SEA",
    "SE2 CI Consumption SEA",
    "DE CI Consumption SEA",
    "CZ CI Consumption SEA",
    "PL CI Consumption SEA",
    "NO2 CI Consumption SEA",
    ],
    ticks=range(0,len(columns)))
plt.show()