# Data loader

In [29]:
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError
import pandas as pd
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
import os
load_dotenv()

FILEPATH = 'data/bronze/df.parquet'

In [27]:
# Setup client
client = EntsoePandasClient(api_key=os.getenv('ENTSOE_API_KEY')) # Get API key through website, after kindly asking the support

In [18]:
# Load already-downloaded data
df = pd.DataFrame()
if Path(FILEPATH).is_file(): 
    df = pd.read_parquet(FILEPATH)
df.head(3)

Unnamed: 0,Forecasted Load,Actual Load
2014-10-24 00:00:00+02:00,,6522.0
2014-10-24 01:00:00+02:00,,6342.0
2014-10-24 02:00:00+02:00,,6269.0


In [19]:
# Figure out the latest-available data
start_ts = pd.Timestamp('20140101', tz='Europe/Zurich') # Very early ts
if len(df): 
    start_ts = pd.read_parquet(FILEPATH).index.max() + pd.Timedelta(1, 'm')
start_ts

Timestamp('2024-09-28 16:01:00+0200', tz='Europe/Zurich')

In [20]:
# Fetch loads and forecasts
end_ts = pd.Timestamp(datetime.now(), tz='Europe/Zurich') + pd.Timedelta(1, 'd')
fetched_df = pd.DataFrame()
try:
    fetched_df = client.query_load_and_forecast(
        country_code='CH', 
        start=start_ts, 
        end=end_ts
    )
except NoMatchingDataError:
    print(f"No data available between {start_ts} -> {end_ts}")
    
fetched_df.head(3)

No data available between 2024-09-28 16:01:00+02:00 -> 2024-09-28 17:19:11.461002+02:00


In [21]:
# Append the newly-fetched data to the current data 
df = pd.concat([df, fetched_df], axis=0)
df.head(3)

Unnamed: 0,Forecasted Load,Actual Load
2014-10-24 00:00:00+02:00,,6522.0
2014-10-24 01:00:00+02:00,,6342.0
2014-10-24 02:00:00+02:00,,6269.0


In [22]:
assert df.index.is_unique

In [23]:
# Dump to bronze
df.to_parquet(FILEPATH)