# Data loader

In [1]:
from entsoe import EntsoePandasClient
from entsoe.exceptions import NoMatchingDataError
import pandas as pd
from datetime import datetime
from pathlib import Path

FILEPATH = 'data/bronze/df.parquet'

In [2]:
# Setup client
client = EntsoePandasClient(api_key="511fd94f-c3eb-4c3d-806b-42fcad2bb9c1") # Get API key through website, after kindly asking the support

In [3]:
# Load already-downloaded data
df = pd.DataFrame()
if Path(FILEPATH).is_file(): 
    df = pd.read_parquet(FILEPATH)
df.head(3)

Unnamed: 0,Forecasted Load,Actual Load
2014-10-24 00:00:00+02:00,,6522.0
2014-10-24 01:00:00+02:00,,6342.0
2014-10-24 02:00:00+02:00,,6269.0


In [11]:
# Figure out the latest-available data
start_ts = pd.Timestamp('20140101', tz='Europe/Zurich') # Very early ts
if len(df): 
    start_ts = pd.read_parquet(FILEPATH).index.max() + pd.Timedelta(1, 'm')
start_ts

Timestamp('2019-07-24 21:01:00+0200', tz='Europe/Zurich')

In [12]:
# Fetch loads and forecasts
end_ts = pd.Timestamp(datetime.now(), tz='Europe/Zurich') + pd.Timedelta(1, 'd')
fetched_df = pd.DataFrame()
try:
    fetched_df = client.query_load_and_forecast(
        country_code='CH', 
        start=start_ts, 
        end=end_ts
    )
except NoMatchingDataError:
    print(f"No data available between {start_ts} -> {end_ts}")
    
fetched_df.head(3)

Unnamed: 0,Forecasted Load,Actual Load
2019-07-24 22:00:00+02:00,6389.0,6383.0
2019-07-24 23:00:00+02:00,6212.0,6337.0
2019-07-25 00:00:00+02:00,5300.0,5928.0


In [13]:
# Update the bronze data 
df = pd.concat([df, fetched_df], axis=0)
df.head(3)

Unnamed: 0,Forecasted Load,Actual Load
2014-10-24 00:00:00+02:00,,6522.0
2014-10-24 01:00:00+02:00,,6342.0
2014-10-24 02:00:00+02:00,,6269.0


In [14]:
assert df.index.is_unique

In [15]:
# Dump to bronze
df.to_parquet(FILEPATH)