# Download ENTSO-E data

This notebook is not intended to be shared with students, it is just for us to download the data from ENTSO-E and save it in a CSV file which we share with the students.

In [1]:
import os
import pysftp
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import datetime
import glob
from pathlib import Path
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

PICKLE_FILE = "entsoe-demand.pickle"



# if you want, you can modify this too, per default it will create a folder
# in the parant folder of the homework repository:
DOWNLOAD_DIR = 'entsoe-data'

CATEGORIES = [
    'ActualTotalLoad_6.1.A'
]

In [3]:
from getpass import getpass
user = getpass('User for ENTSO-E API:')
pwd = getpass('Password for ENTSO-E API:')

In [4]:
def download_entsoe_data(user, pwd, category, output_dir, server_uri='sftp-transparency.entsoe.eu'):
    """Download a dataset from ENTSO-E's transparency data sftp server.
    
    Contact ENTSO-E to receive login credentials:
    https://transparency.entsoe.eu/usrm/user/createPublicUser
    
    :param user: user name required for connecting with sftp server
    :param pwd: password required for connecting with sftp server
    :param category: ENTSO-E data category to be downloaded
    :param output_dir: directory where downloaded data is saved to, a separate 
        subdirectory is created for each category.
    :param server_uri: URI of ENTSO-E transparency server (default last updated on 2020-05-01)
    
    """
    abspath = os.path.abspath(output_dir)
    
    # check if local_dir exists and create if it doesn't
    if not os.path.exists(abspath):
        os.mkdir(abspath)
        print (f'Successfully created the directory {abspath} and using it for download')
    else:
        print (f'{abspath} exists and will be used for download')  

    print("\nCopy this path for other notebooks, e.g. the next lecture or homework:\n"
          f"DOWNLOAD_DIR = '{abspath}'\n")
        
    cnopts = pysftp.CnOpts()
    cnopts.hostkeys = None
    
    # connect to entsoe server via sFTP
    entsoe_dir = f'/TP_export/{category}'
    with pysftp.Connection(server_uri, username=user, password=pwd, cnopts=cnopts) as sftp:
        sftp.chdir(entsoe_dir)
        files_entsoe = sftp.listdir()
        to_download = list(files_entsoe)
        
        print(f'In total, {len(to_download)} files are going to be downloaded')
        
        # download files not on disk
        
        for file in to_download:
            print(f'Downloading file {file}...')
            
            dest_file = os.path.join(abspath, file)
            
            if not os.path.exists(dest_file):
                temp_file = os.path.join(abspath, f'{file}.partial')
                
                sftp.get(f'{entsoe_dir}/{file}', temp_file)
                
                os.rename(temp_file, dest_file)                   
                print(f'{file} downloaded successfully.')
                
            else:
                 print(f'{file} already present locally, skipping download.')

           
    sftp.close()
    print("All downloads completed")

In [5]:
for category in CATEGORIES:
    download_entsoe_data(user, pwd, category, DOWNLOAD_DIR)

/data/notebook_files/entsoe-data exists and will be used for download

Copy this path for other notebooks, e.g. the next lecture or homework:
DOWNLOAD_DIR = '/data/notebook_files/entsoe-data'

In total, 125 files are going to be downloaded
Downloading file 2014_12_ActualTotalLoad_6.1.A.csv...
2014_12_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_01_ActualTotalLoad_6.1.A.csv...
2015_01_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_02_ActualTotalLoad_6.1.A.csv...
2015_02_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_03_ActualTotalLoad_6.1.A.csv...
2015_03_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_04_ActualTotalLoad_6.1.A.csv...
2015_04_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_05_ActualTotalLoad_6.1.A.csv...
2015_05_ActualTotalLoad_6.1.A.csv downloaded successfully.
Downloading file 2015_06_ActualTotalLoad_6.1.A.csv...
2015_06_ActualTotalLoad_6.1.



In [4]:
def read_single_csv_entso_e(file):
    print(file)
    return pd.read_csv(file, sep='\t', parse_dates=["DateTime"])


def load_complete_entso_e_data(directory):
    pattern = Path(directory) / '*.csv'
    files = glob.glob(str(pattern))

    if not files:
        raise ValueError(f"No files found when searching in {pattern}, wrong directory?")
    
    print(f'Concatenating {len(files)} csv files...')

    each_csv_file = [read_single_csv_entso_e(file) for file in files]
    data = pd.concat(each_csv_file, ignore_index=True)

    data = data.sort_values(by=["AreaName", "DateTime"])
    data = data.set_index("DateTime")

    print("Loading done.")

    return data


power_demand = load_complete_entso_e_data(DOWNLOAD_DIR)

Concatenating 125 csv files...
entsoe-data/2025_04_ActualTotalLoad_6.1.A.csv
entsoe-data/2025_03_ActualTotalLoad_6.1.A.csv
entsoe-data/2025_02_ActualTotalLoad_6.1.A.csv
entsoe-data/2025_01_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_12_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_11_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_10_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_09_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_08_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_07_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_06_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_05_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_04_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_03_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_02_ActualTotalLoad_6.1.A.csv
entsoe-data/2024_01_ActualTotalLoad_6.1.A.csv
entsoe-data/2023_12_ActualTotalLoad_6.1.A.csv
entsoe-data/2023_11_ActualTotalLoad_6.1.A.csv
entsoe-data/2023_10_ActualTotalLoad_6.1.A.csv
entsoe-data/2023_09_ActualTotalLoad_6.1.A.csv
entsoe-data/2023_08_ActualTotalLoad_6.1.A.csv
ent

In [5]:
power_demand_shortened = power_demand[power_demand["AreaTypeCode"]=='CTY'][["ResolutionCode", "AreaName","TotalLoadValue"]]
    

In [6]:
power_demand_shortened.to_pickle(PICKLE_FILE)

In [7]:
power_demand = pd.read_pickle(PICKLE_FILE)

In [9]:
power_demand["AreaName"].unique()

In [13]:
power_demand_shortened = power_demand

In [14]:
power_demand_shortened.to_pickle("entsoe-demand-shortened.pickle")