# Estimate the Filling Level of a Pumped Hydro Energy Storage

This notebook estimates the filling level of a pumped hydro energy storage (PHES) using time series of pumping and production data. This data is provided by Energy Quantified and fetched via their Python client.

Most PHES plants have insufficient data or are not closed-loop systems. Two plants in Europe have been identified with good quality in their data. These are the Kruonis plant in Lithuania and the Čierny Váh plant in Slovakia.

| Plant | LT Kruonius | SK Čierny Váh |
|---|---|---|
| Max. Capacity | 900 MW | 735.16 MW |
| Efficiency | 74 % | 73 % |
| Max. Storage | 10,800 MWh | 4,000 MWh | 

In [1]:
import sys
import os

# required to use src module from parent directory
sys.path.append(os.path.abspath(os.path.join(os.path.abspath(''), '..')))
print(f"Appended parent directory to sys.path: {os.path.abspath(os.path.join(os.path.abspath(''), '..'))}")

from datetime import date, timedelta
from matplotlib import pyplot as plt
import seaborn as sns

from energyquantified import EnergyQuantified 
from energyquantified.time import CET

from src.filling_level import estimate_filling_level

sns.set_theme(rc={'figure.figsize':(15, 7)})

Appended parent directory to sys.path: /Users/kpelz/Developer/montel/carbon_power_storage


In [None]:
# setup EQ's Python client
eq = EnergyQuantified(api_key_file="../eq_api_key.txt")
eq.is_api_key_valid()

This notebook uses the LT Kruonis plant but it can be easily switched to SK Čierny Váh by commenting and uncommenting the following lines.

In [None]:
days = 360

plant_name = "LT @Kruonis"
plant_efficiency = 0.74
max_storage = 10_800
begin_date = date(2019, 1, 1) - timedelta(days=days)
end_date = date(2024, 7, 1)

# plant_name = "SK @Cierny-Vah"
# plant_efficiency = 0.73
# max_storage = 4_000
# begin_date = date(2019, 1, 1) - timedelta(days=days)
# # begin_date = date(2015, 1, 1)
# end_date = date(2024, 7, 1)


## Load Data

In [None]:
plant_curve = f"{plant_name} Hydro Pumped-storage Production MWh/h H Actual"
ts = eq.timeseries.load(plant_curve, begin=begin_date, end=end_date, unit="MWh")

ts

In [None]:
# create data frame
df = ts.to_df(name="Net Production", single_level_header=True)
df.info()

In [None]:
# count missing values
missing_values = df.isnull().sum()

print(f"Missing values: {missing_values["Net Production"]} of {len(df)} => {missing_values['Net Production'] / len(df) * 100:.2f}%")

In [None]:
df.describe()

In [None]:
df["Production"] = df["Net Production"].clip(lower=0)
df["Pumping"] = df["Net Production"].clip(upper=0)

In [None]:
sns.lineplot(data=df, x=df.index, y="Net Production")

## Estimate Efficiency

Even though the plants have an official efficiency, it's often not the most accurate and actually varies slightly over time. That's why the efficiency must be estimated. 

In [None]:
efficiency = df["Production"].sum() / -df["Pumping"].sum()
print(f"Overall efficiency: {format(efficiency, '.4f')} (it should be ~{plant_efficiency})")

In [None]:
window_sizes = [7, 14, 30, 60, 90, 180, 270, 360]

for window_size in window_sizes:
    df[f"Production {window_size}d"] = df["Production"].fillna(0).rolling(window=window_size*24).mean()
    df[f"Pumping {window_size}d"] = df["Pumping"].fillna(0).rolling(window=window_size*24).mean()

    df[f"Efficiency {window_size}d"] = df[f"Production {window_size}d"] / -df[f"Pumping {window_size}d"]

df[[f"Efficiency {window_size}d" for window_size in window_sizes]].describe()

In [None]:
plt.figure(figsize=(15, 5))
sns.boxenplot(
    data=df[[f"Efficiency {window_size}d" for window_size in window_sizes[4:]]],
    showfliers=False,
    orient="h",
)
plt.xlabel("Efficiency")
plt.xlim(0.72, 0.85)
plt.ylabel("Window Size")
plt.yticks(ticks=range(len(window_sizes[4:])), labels=[f"{window_size} Days" for window_size in window_sizes[4:]])
# plt.legend()
plt.show()

In [None]:
efficiency_window_size = 180 * 24

# prod_sum = df["Production"].rolling(efficiency_window_size).sum()
# pump_sum = df["Pumping"].rolling(efficiency_window_size).sum()
prod_sum = df["Production"].fillna(0).rolling(efficiency_window_size).sum()
pump_sum = df["Pumping"].fillna(0).rolling(efficiency_window_size).sum()

df["Efficiency"] = prod_sum / pump_sum * -1.0

eff_mean = df["Efficiency"].mean()
eff_std = df["Efficiency"].std()

print(f"Efficiency over {days} days:")
print(f"Efficiency mean: {eff_mean:.4f}")
print(f"Efficiency std: {eff_std:.4f}")
print(df["Efficiency"].describe())

plt.figure(figsize=(15, 3))
sns.histplot(df["Efficiency"], kde=True)
# show official and actual efficiency
plt.axvline(plant_efficiency, color="r", label="Official Efficiency", linestyle="--", linewidth=2)
plt.axvline(eff_mean, color="purple", label="Actual Efficiency", linestyle="--", linewidth=2)
# plt.xlim(0.7, 0.79)
plt.legend()

In [None]:
max_df = df.resample('W').quantile(0.95)
min_df = df.resample('W').quantile(0.05)
mean_df = df.resample('W').mean()

plt.figure(figsize=(15, 3))
# sns.lineplot(data=max_df, x=max_df.index, y="Efficiency", label="95th Percentile Efficiency")
sns.lineplot(data=mean_df, x=mean_df.index, y="Efficiency", label="Mean Efficiency")
# sns.lineplot(data=min_df, x=min_df.index, y="Efficiency", label="5th Percentile Efficiency")
# show official efficiency
plt.axhline(y=plant_efficiency, color='r', linestyle='--', label="Official Efficiency", linewidth=2)
plt.axhline(y=efficiency, color='purple', linestyle='--', label="Actual Efficiency", linewidth=2)

plt.xlim(date(2019, 1, 1), end_date)
# plt.xlim(date(2023, 6, 1), date(2024, 1, 1))
plt.xlabel("Date")
plt.ylabel("Efficiency")
plt.legend()
plt.show()

## Estimate Filling Level

In [None]:
df["Filling Level"] = estimate_filling_level(
    df.iloc[efficiency_window_size:],
    "Net Production",
    efficiency_name="Efficiency",
    max_filling_level=max_storage
)
df["Filling Level"].describe()

In [None]:
df["Filling Level"][2160:].head()

In [None]:
df["Filling Level"].tail()

In [None]:
# find all timestamps where the filling level equals zero
zero_filling_level = df[df["Filling Level"] == 0]

zero_filling_level[["Filling Level"]]

In [None]:
# find all timestamps where the filling level equals zero
low_filling_level = df[df["Filling Level"] < 3800]

low_filling_level[["Filling Level"]].tail(50)

In [None]:
# for SK plant
df["Readjusted Filling Level"] = df["Filling Level"] - 3577.311499

max_df = df.resample('W').quantile(0.95)
min_df = df.resample('W').quantile(0.05)
mean_df = df.resample('W').mean()


sns.lineplot(data=max_df, x=max_df.index, y="Filling Level", label="95th Percentile")
sns.lineplot(data=mean_df, x=max_df.index, y="Filling Level", label="Mean")
sns.lineplot(data=min_df, x=max_df.index, y="Filling Level", label="5th Percentile")

sns.lineplot(data=max_df, x=max_df.index, y="Readjusted Filling Level", label="Readjusted 95th Percentile", linestyle="-.", color="b")
sns.lineplot(data=mean_df, x=max_df.index, y="Readjusted Filling Level", label="Readjusted Mean", linestyle="-.", color="#DD8452")
sns.lineplot(data=min_df, x=max_df.index, y="Readjusted Filling Level", label="Readjusted 5th Percentile", linestyle="-.", color="g")

plt.axhline(y=max_storage, color='r', linestyle='--', label="Filling Capacity", linewidth=2)
# plt.axhline(y=max_storage + 3000, color='purple', linestyle='--', label="Readjusted Filling Capacity", linewidth=2)
# plt.title(f"{plant_name} Filling Level")
plt.legend()
plt.ylabel("Filling Level [MWh]")
plt.ylim(bottom=0)
plt.xlim(date(2019, 1, 1), end_date)
plt.xlabel("Date")
plt.show()

In [None]:
plt.figure(figsize=(15, 3))
sns.histplot(df, x="Filling Level", kde=True)
# plt.title(f"{plant_name} Filling Level Distribution")
plt.axvline(max_storage, color="r", label="Official Capacity", linestyle="--", linewidth=2)
plt.xlabel("Filling Level [MWh]")
plt.legend()
plt.show()

# Store Data (CSV)

In [None]:
# convert index to UTC
df.index = df.index.tz_convert("UTC")
df[[f"Filling Level"]].iloc[efficiency_window_size:].to_csv(f"../data/{plant_name.lower().replace(" @", "_")}_filling_level.csv")