The following libraries are loaded to support API communication, weather data retrieval, GRIB file processing, and general data manipulation for the ERA5, Fingrid, Esett and EnergiDataService workflows.


In [1]:
import os
import time
import requests
import xarray as xr
import pandas as pd
import cdsapi
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv

## Fingrid â€“ API Data Retrieval

The code below downloads Finnish electricity system data from the **Fingrid Open Data API**.  
Authentication is required, so the script loads an API key from the environment and includes it in every request via the `x-api-key` header.

Because the Fingrid API has request-size limitations, the script retrieves data **month by month**. For each year (2023 and 2024), monthly time windows are generated, and the API is queried repeatedly until all pages of data for that month are fetched. The script automatically handles pagination using the metadata returned by Fingrid.

The following Fingrid datasets are downloaded:

- **Consumption** (dataset 192)  
- **Wind generation** (dataset 181)  
- **Nuclear generation** (dataset 188)  
- **Hydro generation** (dataset 191)

For each dataset and each year, all monthly records are combined, converted to a unified timestamp (`datetime_utc`), cleaned, deduplicated, and sorted before being saved as a yearly CSV file, such as:

- `fi_consumption_2023.csv`  
- `fi_consumption_2024.csv`
- `etc.`

This ensures a complete and continuous time series for both 2023 and 2024, despite the Fingrid API returning data in paginated batches and limiting the size of individual requests.


In [11]:
load_dotenv()
FINGRID_API_KEY = os.getenv("FINGRID_API_KEY")

BASE_URL = "https://data.fingrid.fi/api/datasets"

def generate_monthly_ranges_for_year(year):
    """Generate month-to-month intervals for a specific year."""
    start = f"{year}-01-01"
    end = f"{year+1}-01-01"
    dates = pd.date_range(start=start, end=end, freq="MS")

    return [
        (
            dates[i].strftime("%Y-%m-%dT%H:%M:%SZ"),
            dates[i + 1].strftime("%Y-%m-%dT%H:%M:%SZ")
        )
        for i in range(len(dates) - 1)
    ]
def fetch_fingrid_dataset(dataset_id, start, end, page=1, page_size=20000):
    """Fetch a single page of a Fingrid dataset."""
    url = f"{BASE_URL}/{dataset_id}/data"
    headers = {"x-api-key": FINGRID_API_KEY}
    params = {
        "startTime": start,
        "endTime": end,
        "page": page,
        "pageSize": page_size,
        "format": "json"
    }
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()


def download_dataset_year(dataset_id, name, year):
    """Download a full Fingrid dataset for one year."""
    print(f"\nDownloading {name} for year {year}")
    all_data = []

    for start, end in generate_monthly_ranges_for_year(year):
        print(f"  â†’ {name} | {start} â†’ {end}")
        page = 1

        while True:
            try:
                data_json = fetch_fingrid_dataset(dataset_id, start, end, page)
                data = data_json.get("data", [])

                if not data:
                    break

                df = pd.DataFrame(data)
                df["datetime_utc"] = (
                    pd.to_datetime(df["startTime"], utc=True)
                      .dt.tz_localize(None)
                )
                df = df[["datetime_utc", "value"]]

                all_data.append(df)

                pagination = data_json.get("pagination", {})
                if page >= pagination.get("lastPage", 1):
                    break

                page += 1
                time.sleep(1)

            except Exception as e:
                print(f"Error during {start}, page {page}: {e}")
                break

        time.sleep(2)


    if all_data:
        final_df = (
            pd.concat(all_data)
              .drop_duplicates()
              .sort_values("datetime_utc")
        )

        os.makedirs("../data", exist_ok=True)
        filename = f"../data/fi_{name}_{year}.csv"
        final_df.to_csv(filename, index=False)

        print(f"Saved {filename} with {len(final_df)} rows")

FINGRID_DATASETS = {
    "consumption": 192,
    "wind": 181,
    "nuclear": 188,
    "hydro": 191,
}

if __name__ == "__main__":
    for name, dataset_id in FINGRID_DATASETS.items():
        download_dataset_year(dataset_id, name, 2023)
        download_dataset_year(dataset_id, name, 2024)


Downloading consumption for year 2023
  â†’ consumption | 2023-01-01T00:00:00Z â†’ 2023-02-01T00:00:00Z
  â†’ consumption | 2023-02-01T00:00:00Z â†’ 2023-03-01T00:00:00Z


KeyboardInterrupt: 

Esett API â€“ Download Electricity Data

In [None]:
MBA_CODES = {
    "SE1": "10Y1001A1001A44P",
    "SE2": "10Y1001A1001A45N",
    "SE3": "10Y1001A1001A46L",
    "SE4": "10Y1001A1001A47J",
    "FI":  "10YFI_1________U",
    "DK1": "10YDK-1--------W",
    "DK2": "10YDK-2--------M",
    "NO1": "10YNO_1________2",
    "NO2": "10YNO_2________T",
    "NO3": "10YNO_3________J",
    "NO4": "10YNO_4________9",
    "NO5": "10Y1001A1001A48H"
}

In [None]:
load_dotenv()

BASE_URL_ESETT = "https://api.opendata.esett.com"
SAVE_PATH = "../data/esett"

def generate_months(start="2024-01-01", end="2025-01-01"):
    dates = pd.date_range(start=start, end=end, freq="MS")
    return [(dates[i], dates[i + 1]) for i in range(len(dates) - 1)]

def fetch_esett(endpoint, mba, start, end):
    params = {
        "start": start.strftime("%Y-%m-%dT00:00:00.000Z"),
        "end":   end.strftime("%Y-%m-%dT00:00:00.000Z"),
        "mba":   mba
    }
    url = BASE_URL_ESETT + endpoint
    r = requests.get(url, params=params, timeout=30)
    if r.status_code == 204:
        return None
    r.raise_for_status()
    data = r.json()
    return pd.DataFrame(data)

def normalize_df(df, zone, value_col="total"):
    if df is None or df.empty:
        return None
    if "timestampUTC" in df:
        df["datetime_utc"] = pd.to_datetime(df["timestampUTC"], utc=True)
    elif "timestamp" in df:
        df["datetime_utc"] = pd.to_datetime(df["timestamp"], utc=True)
    else:
        return None
    if value_col not in df.columns:
        value_col = "value"
    df = df[["datetime_utc", value_col]].rename(columns={value_col: zone})
    df["datetime_utc"] = df["datetime_utc"].dt.tz_localize(None)
    return df

def download_dataset_all_zones(exp_endpoint, output_name, value_field):
    os.makedirs(SAVE_PATH, exist_ok=True)
    full_df = None

    for zone, mba in MBA_CODES.items():
        zone_dfs = []
        print(f"ðŸ”µ {output_name} | {zone}")
        for (start, end) in generate_months():
            try:
                df = fetch_esett(exp_endpoint, mba, start, end)
                df = normalize_df(df, zone, value_field)
                if df is not None:
                    zone_dfs.append(df)
            except Exception as e:
                print(f"   âš  Error {zone} {start}: {e}")
            time.sleep(1.5)

        if zone_dfs:
            zone_data = pd.concat(zone_dfs).drop_duplicates().sort_values("datetime_utc")
            if full_df is None:
                full_df = zone_data
            else:
                full_df = full_df.merge(zone_data, on="datetime_utc", how="outer")

    if full_df is not None:
        full_df.to_csv(f"{SAVE_PATH}/{output_name}_2024.csv", index=False)
        print(f"âœ… Saved {output_name}_2024.csv")

if __name__ == "__main__":
    print("ðŸš€ Downloading Production (EXP16)")
    download_dataset_all_zones("/EXP16/Aggregate", "production", "total")

    print("ðŸš€ Downloading Consumption (EXP15)")
    download_dataset_all_zones("/EXP15/Aggregate", "consumption", "total")


In [None]:
BASE_URL_ESETT= "https://api.opendata.esett.com/EXP13/Aggregate"
SAVE_PATH = "../data/esett"



def generate_months(start="2024-01-01", end="2025-01-01"):
    dates = pd.date_range(start=start, end=end, freq="MS")
    return [(dates[i], dates[i + 1]) for i in range(len(dates) - 1)]

def fetch_imbalance(mba_code, start, end):
    params = {
        "start": start.strftime("%Y-%m-%dT00:00:00.000Z"),
        "end":   end.strftime("%Y-%m-%dT00:00:00.000Z"),
        "mba":   mba_code
    }
    r = requests.get(BASE_URL_ESETT, params=params, timeout=30)
    if r.status_code == 204:
        return None
    r.raise_for_status()
    return pd.DataFrame(r.json())

def normalize(df, zone):
    if df is None or df.empty:
        return None
    if "timestampUTC" in df.columns:
        df["datetime_utc"] = pd.to_datetime(df["timestampUTC"], utc=True).dt.tz_localize(None)
    elif "timestamp" in df.columns:
        df["datetime_utc"] = pd.to_datetime(df["timestamp"], utc=True).dt.tz_localize(None)
    else:
        return None
    if "imbalance" not in df.columns:
        return None
    return df[["datetime_utc", "imbalance"]].rename(columns={"imbalance": zone})

def download_all_imbalance():
    os.makedirs(SAVE_PATH, exist_ok=True)
    full_df = None

    for zone, mba in MBA_CODES.items():
        print(f"ðŸ”µ Fetching Imbalance for {zone}")
        zone_monthly = []

        for start, end in generate_months():
            try:
                df = fetch_imbalance(mba, start, end)
                df = normalize(df, zone)
                if df is not None:
                    zone_monthly.append(df)
            except Exception as e:
                print(f"âš  Error for {zone} at {start}: {e}")
            time.sleep(1.2)

        if zone_monthly:
            zone_df = pd.concat(zone_monthly).drop_duplicates().sort_values("datetime_utc")
            if full_df is None:
                full_df = zone_df
            else:
                full_df = full_df.merge(zone_df, on="datetime_utc", how="outer")

    if full_df is not None:
        file_path = f"{SAVE_PATH}/imbalance_2024.csv"
        full_df.to_csv(file_path, index=False)
        print(f"Saved {file_path} with shape: {full_df.shape}")
    else:
        print("No data saved!")

if __name__ == "__main__":
    download_all_imbalance()


In [None]:
# eSett API endpoint for single balance prices
BASE_URL_ESETT = "https://api.opendata.esett.com/EXP14/Aggregate"
SAVE_PATH = "../data/esett"

def generate_months(start="2024-01-01", end="2025-01-01"):
    dates = pd.date_range(start=start, end=end, freq="MS")
    return [(dates[i], dates[i + 1]) for i in range(len(dates)-1)]

def fetch_prices(mba_code, start, end):
    params = {
        "start": start.strftime("%Y-%m-%dT00:00:00.000Z"),
        "end": end.strftime("%Y-%m-%dT00:00:00.000Z"),
        "mba": mba_code,
        "resolution": "hour"
    }
    resp = requests.get(BASE_URL_ESETT, params=params)
    if resp.status_code == 204:
        return None
    resp.raise_for_status()
    return pd.DataFrame(resp.json())

def normalize_price_df(df, zone):
    if df is None or df.empty:
        return None

    if "timestampUTC" in df:
        df["datetime_utc"] = pd.to_datetime(df["timestampUTC"], utc=True).dt.tz_localize(None)
    else:
        df["datetime_utc"] = pd.to_datetime(df["timestamp"], utc=True).dt.tz_localize(None)

    return df[["datetime_utc", "upRegPrice", "downRegPrice"]].rename(
        columns={
            "upRegPrice": f"{zone}_up",
            "downRegPrice": f"{zone}_down"
        }
    )

def download_all_prices():
    os.makedirs(SAVE_PATH, exist_ok=True)
    full_df = None

    for zone, mba in MBA_CODES.items():
        zone_months = []
        print(f"Fetching EXP14 Prices for {zone}")

        for start, end in generate_months():
            try:
                df = fetch_prices(mba, start, end)
                df = normalize_price_df(df, zone)
                if df is not None:
                    zone_months.append(df)
            except Exception as e:
                print(f"Error at {start} for {zone}: {e}")
            time.sleep(1)

        if zone_months:
            zone_data = pd.concat(zone_months).drop_duplicates().sort_values("datetime_utc")
            if full_df is None:
                full_df = zone_data
            else:
                full_df = full_df.merge(zone_data, on="datetime_utc", how="outer")

    if full_df is not None:
        file_path = f"{SAVE_PATH}/balancing_price_2024.csv"
        full_df.to_csv(file_path, index=False)
        print(f"Saved balancing_price_2024.csv with shape {full_df.shape}")

if __name__ == "__main__":
    download_all_prices()


## EnergiDataService â€“ Elspot Price Retrieval

The script below downloads hourly Elspot electricity prices from the **EnergiDataService API** (`api.energidataservice.dk`). A list of Nordic bidding zones is defined, and the function `get_elspot_prices()` sends an API request for each zone using the specified date range. No API key is required for this dataset, so all requests are made anonymously.

For each bidding zone, the API response is converted into a pandas DataFrame containing the timestamp (`HourUTC`), bidding zone (`PriceArea`), and hourly spot price in euros (`SpotPriceEUR`). The function `download_all_nordic_prices()` loops through all Nordic zones and saves each result as a separate CSV file.

Although the script queries **all Nordic bidding zones** (DK1, DK2, NO1â€“NO5, SE1â€“SE4, FI), the EnergiDataService Elspot dataset only contains price data for a limited set of areas. As a result, **the output files are only generated for**:

- **DK1**  
- **DK2**  
- **NO2**  
- **SE3**  
- **SE4**

For all other bidding zones, the API returns an empty dataset, and no CSV file is saved. This behavior reflects the coverage of the EnergiDataService platform, which does not provide Elspot price data for all Nordic areas.


In [None]:
# Create data folder if it doesn't exist
os.makedirs("../data/energi", exist_ok=True)

# List of Nordic bidding zones
NORDIC_ZONES = [
    "DK1", "DK2",
    "NO1", "NO2", "NO3", "NO4", "NO5",
    "SE1", "SE2", "SE3", "SE4",
    "FI"
]

def get_elspot_prices(start="2024-01-01", end="2025-01-01", area="DK1"):
    """Fetch Elspot prices from Energi Data Service for a given price area."""
    url = "https://api.energidataservice.dk/dataset/Elspotprices"
    params = {
        "start": start,
        "end": end,
        "filter": f'{{"PriceArea": ["{area}"]}}'
    }
    response = requests.get(url, params=params)

    if response.status_code != 200:
        print(f"Failed to pull data for {area}: {response.text}")
        return None

    data = response.json().get("records", [])
    if not data:
        print(f"No data returned for {area}")
        return None

    df = pd.DataFrame(data)
    df["HourUTC"] = pd.to_datetime(df["HourUTC"])
    df = df[["HourUTC", "PriceArea", "SpotPriceEUR"]]
    return df

def download_all_nordic_prices(start="2024-01-01", end="2025-01-01"):
    for zone in NORDIC_ZONES:
        print(f"â¬‡ Downloading price data for {zone} ...")
        df = get_elspot_prices(start, end, zone)

        if df is not None and not df.empty:
            filepath = f"/data/{zone.lower()}_prices.csv"
            df.to_csv(filepath, index=False)
            print(f"Saved: {filepath}")
        else:
            print(f"No data saved for {zone}")

if __name__ == "__main__":
    download_all_nordic_prices("2024-01-01", "2025-01-01")


## Weather â€“ CDS API

The code below sends a request to the **CDS API** to download ERA5 reanalysis data for a selected month and year. The request specifies the required meteorological variables (temperature, wind components at 10 m and 100 m, mean sea-level pressure, precipitation, and surface solar radiation) as well as all days and hours in the chosen period, together with a bounding box covering the entire Nordic region.

Because the CDS API has **request size limitations**, the fields **`year`** and **`month`** in the request must be **manually adjusted** each time to retrieve the next month of data. The output filename (e.g., `era5_weather_2023_01.grib`) must also be updated manually to match the selected period. After these adjustments, running the cell will download and save the corresponding GRIB file for that month.

In [None]:
client = cdsapi.Client()

dataset = "reanalysis-era5-single-levels"

request = {
    "product_type": "reanalysis",
    "variable": [
        "2m_temperature",
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
        "mean_sea_level_pressure",
        "total_precipitation",
        "100m_u_component_of_wind",
        "100m_v_component_of_wind",
        "surface_solar_radiation_downwards"
    ],

    "year": ["2023"],
    "month": ["01"],
    "day": [f"{d:02d}" for d in range(1, 32)],
    "time": [f"{h:02d}:00" for h in range(24)],

    # Entire Nordic region bounding box (North, West, South, East)
    "area": [72, 5, 54, 32],

    "format": "grib"
}

client.retrieve(dataset, request).download("era5_weather_2023_01.grib")


# Weather â€“ Bidding Zone Extraction

After downloading the monthly ERA5 GRIB files, the script below processes each file and extracts weather values for all Nordic bidding zones. Each GRIB file is opened using `xarray` with the `cfgrib` engine, and the weather variables are sampled at a representative latitudeâ€“longitude coordinate for each bidding zone (e.g., DK1, DK2, NO1â€“NO5, SE1â€“SE4, FI). The script selects the **nearest ERA5 grid point** to each zoneâ€™s coordinate and converts the resulting dataset into a pandas DataFrame.

Only the relevant weather variables (temperature, wind components, pressure, precipitation, and solar radiation) are kept. The temperature variable is also converted from Kelvin to Celsius. Finally, the processed data for all zones is saved as a monthly CSV file named `bidding_zone_weather_YYYY_MM.csv`.

This allows each month of ERA5 climate data to be transformed into a clean, zone-level dataset aligned with the rest of the Nordic electricity and balancing datasets.


In [None]:
base_dir = Path("Weather data") 
years = [2023, 2024]
months = range(1, 13)

bidding_zone_points = {
    "DK1": (55.6, 9.2),
    "DK2": (55.7, 12.5),
    "NO1": (60.0, 10.0),
    "NO2": (59.0, 6.5),
    "NO3": (64.0, 11.0),
    "NO4": (69.0, 19.0),
    "NO5": (62.0, 5.5),
    "SE1": (66.0, 20.0),
    "SE2": (63.0, 17.0),
    "SE3": (59.5, 16.0),
    "SE4": (57.0, 15.0),
    "FI":  (61.5, 25.0),
}

for year in years:
    year_dir = base_dir / str(year)

    for month in months:
        grib_name = f"era5_weather_{year}_{month:02d}.grib"
        grib_path = year_dir / grib_name

        if not grib_path.exists():
            print(f"[{year}-{month:02d}] File not found: {grib_path}")
            continue

        print(f"\nProcessing {grib_path}")

        ds = xr.open_dataset(
            grib_path,
            engine="cfgrib",
            backend_kwargs={"indexpath": ""}
        )
        print("Dataset loaded")

        zone_dataframes = []

        for zone, (lat, lon) in bidding_zone_points.items():
            print(f" -> {zone}: lat={lat}, lon={lon}")
            point = ds.sel(latitude=lat, longitude=lon, method="nearest")
            df_zone = point.to_dataframe().reset_index()
            df_zone["zone"] = zone
            zone_dataframes.append(df_zone)

        df_all = pd.concat(zone_dataframes, ignore_index=True)

        cols_to_keep = ["valid_time", "zone"]
        for col in ["t2m", "u10", "v10", "msl", "tp", "ssrd", "u100", "v100"]:
            if col in df_all.columns:
                cols_to_keep.append(col)

        df_all = df_all[cols_to_keep]

        if "t2m" in df_all.columns:
            df_all["t2m"] = df_all["t2m"] - 273.15

        csv_name = f"bidding_zone_weather_{year}_{month:02d}.csv"
        csv_path = year_dir / csv_name

        df_all.to_csv(csv_path, index=False)
        print(f"Saved: {csv_path}")
        print(df_all.head())
        print(df_all.info())

[2023-01] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_01.grib
[2023-02] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_02.grib
[2023-03] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_03.grib
[2023-04] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_04.grib
[2023-05] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_05.grib
[2023-06] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_06.grib
[2023-07] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_07.grib
[2023-08] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_08.grib
[2023-09] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_09.grib
[2023-10] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_10.grib
[2023-11] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data/2023/era5_weather_2023_11.grib
[2023-12] Ð¤Ð°Ð¹Ð» Ð½Ðµ Ð·Ð½Ð°Ð¹Ð´ÐµÐ½Ð¾: Weather data