In [2]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd
import vwf.data as vwf_data
from calendar import monthrange
def daysDuringMonth(yy, m):
    """
    Attach number of days in month to each year in yy for month m.
    """
    result = []    
    [result.append(monthrange(y, m)[1]) for y in yy]        
    return result

# FRANCE

In [3]:
fr_md = gpd.read_file("input/country-data/fr/fr_turb_info.csv")
fr_md = fr_md.loc[fr_md['statut_parc'] == 'Autorisé'].reset_index(drop=True)
fr_md = fr_md.loc[
    :, [
    "id_aerogenerateur", 
    "puissance_mw", 
    "diametre_rotor",
    "hauteur_mat_nacelle",

    "constructeur",
    "x_aerogenerateur",
    "y_aerogenerateur",
    "epsg"
    ]]
fr_md.columns = [
    "ID",
    "capacity",
    "diameter",
    "height",
    "manufacturer",
    "x",
    "y",
    "epsg"
    ]

points_gdf = gpd.GeoDataFrame(
    fr_md[["ID","capacity","diameter","height","manufacturer"]],
    geometry=gpd.points_from_xy(fr_md.x, fr_md.y, crs=fr_md.epsg.iloc[0])
    ).to_crs(epsg=4326)

points_gdf['capacity'] = points_gdf['capacity'].astype(float) * 1e3  # MW to kW
points_gdf['lon'] = points_gdf.geometry.x  
points_gdf['lat'] = points_gdf.geometry.y
points_gdf = points_gdf.drop(columns='geometry')

# Convert to desired crs and save directly to a shapefile

turb_info = vwf_data.add_models(points_gdf)

Total observed turbines/farms before conditions:  10276


In [None]:
# ALTERNATIVE FR GENERATION DATA
# https://ec.europa.eu/eurostat/databrowser/view/nrg_cb_pem__custom_19402431/default/table
year_star = 2015 # start year of training period
year_end = 2018
ns_data = pd.read_csv("input/country-data/northsea_country_generation.csv")
ns_data = ns_data.loc[
    :, [
    "Standard international energy product classification (SIEC)", 
    "TIME_PERIOD", 
    "OBS_VALUE",
    "geo",
    ]]
ns_data.columns = [
    "carrier",
    "date",
    "output",
    "country",
    ]

country = 'FR'  # Example country code
ns_data = ns_data.loc[(ns_data['country']==country) & (ns_data['carrier']=='Wind')].reset_index(drop=True)
ns_data['date'] = pd.to_datetime(ns_data['date'])
# convert from gigawatt hours to kilowatt hours
ns_data['output'] = pd.to_numeric(ns_data['output'])
ns_data['output'] = ns_data['output'] * 1e6
ns_data['year'] = ns_data['date'].dt.year.astype(int)
ns_data['month'] = ns_data['date'].dt.month.astype(int)
ns_data = ns_data.drop(columns=['date'])

ns_data = ns_data.fillna(0).groupby(['year','month'])['output'].sum().reset_index()

turb_info["ratio"] = turb_info['capacity'] / turb_info['capacity'].sum()

ns_data = ns_data.merge(turb_info[['ID', 'ratio']], how="cross")
ns_data["output"] = ns_data["output"] * ns_data["ratio"]
ns_data = ns_data.dropna(subset=['ID', 'year', 'month'])
ns_data = ns_data.loc[(ns_data["year"] >= year_star) & (ns_data["year"] <= year_end)].reset_index(drop=True)   
obs_gen = ns_data.pivot(index=['ID','year'], columns='month', values='output').reset_index()
# obs_gen = ns_data.pivot(index=['type','year'], columns='month', values='output').reset_index()
obs_gen.columns = [f'obs_{i}' if i not in ['ID', 'year'] else f'{i}' for i in obs_gen.columns]
obs_gen = obs_gen.merge(turb_info[['ID', 'capacity']], how='left', on=['ID'])
obs_gen = obs_gen.dropna().reset_index(drop=True)



for i in range(1,13):
    obs_gen['obs_'+str(i)] = obs_gen['obs_'+str(i)]/(((daysDuringMonth(obs_gen.year, i))*obs_gen['capacity'])*24)

obs_gen = obs_gen.drop(['capacity'], axis=1)
obs_gen = obs_gen.sort_values(by=['ID','year']).reset_index(drop=True)
obs_gen

Unnamed: 0,ID,year,obs_1,obs_2,obs_3,obs_4,obs_5,obs_6,obs_7,obs_8,obs_9,obs_10,obs_11,obs_12
0,0000000044_E1,2015,0.112989,0.103939,0.099125,0.073938,0.078874,0.064387,0.069476,0.055871,0.085045,0.063245,0.130545,0.131214
1,0000000044_E1,2016,0.140924,0.152475,0.121089,0.085957,0.076693,0.054944,0.055716,0.061687,0.050490,0.072020,0.123569,0.072072
2,0000000044_E1,2017,0.099114,0.139221,0.130560,0.073974,0.072721,0.074893,0.084057,0.060763,0.083579,0.102415,0.116556,0.160125
3,0000000044_E1,2018,0.196306,0.149803,0.151215,0.109189,0.077720,0.067985,0.052396,0.066873,0.079519,0.119011,0.137384,0.176818
4,0000000044_E2,2015,0.112989,0.103939,0.099125,0.073938,0.078874,0.064387,0.069476,0.055871,0.085045,0.063245,0.130545,0.131214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40511,0100283090_E7,2018,0.196306,0.149803,0.151215,0.109189,0.077720,0.067985,0.052396,0.066873,0.079519,0.119011,0.137384,0.176818
40512,0100283090_E8,2015,0.112989,0.103939,0.099125,0.073938,0.078874,0.064387,0.069476,0.055871,0.085045,0.063245,0.130545,0.131214
40513,0100283090_E8,2016,0.140924,0.152475,0.121089,0.085957,0.076693,0.054944,0.055716,0.061687,0.050490,0.072020,0.123569,0.072072
40514,0100283090_E8,2017,0.099114,0.139221,0.130560,0.073974,0.072721,0.074893,0.084057,0.060763,0.083579,0.102415,0.116556,0.160125


In [None]:
turb_info.lat.min(), turb_info.lat.max(), turb_info.lon.min(), turb_info.lon.max()

(42.50154105996311, 50.930297624283845, -4.757239614418522, 8.7060513884254)

In [None]:
year_star = 2015 # start year of training period
year_end = 2018 

fr_data = pd.read_csv("input/country-data/FR/observations/Wind_power_generation_in_France.csv")
fr_data.columns = ["date", "filter", "output", "nature"]

fr_data = fr_data.loc[fr_data['filter'].str.contains('evolution')].reset_index(drop=True)
# fr_data["type"] = fr_data["filter"].str.split(" ").str[0].str.lower()
fr_data["output"] = fr_data["output"].replace(',', '.', regex=True)
fr_data = fr_data.drop(columns=["nature","filter"])
fr_data['date'] = pd.to_datetime(fr_data['date'])
fr_data['output'] = pd.to_numeric(fr_data['output'])
# convert from terawatts to kilowatts
fr_data['output'] = fr_data['output'] * 1e9
fr_data['year'] = fr_data['date'].dt.year.astype(int)
fr_data['month'] = fr_data['date'].dt.month.astype(int)
fr_data = fr_data.drop(columns=['date'])

fr_data = fr_data.fillna(0).groupby(['year','month'])['output'].sum().reset_index()
# fr_data['type'] = 'onshore'

turb_info["ratio"] = turb_info['capacity'] / turb_info['capacity'].sum()

fr_data = fr_data.merge(turb_info[['ID', 'ratio']], how="cross")
fr_data["output"] = fr_data["output"] * fr_data["ratio"]
fr_data = fr_data.dropna(subset=['ID', 'year', 'month'])
fr_data = fr_data.loc[(fr_data["year"] >= year_star) & (fr_data["year"] <= year_end)].reset_index(drop=True)   
obs_gen = fr_data.pivot(index=['ID','year'], columns='month', values='output').reset_index()
# obs_gen = fr_data.pivot(index=['type','year'], columns='month', values='output').reset_index()

obs_gen.columns = [f'obs_{i}' if i not in ['ID', 'year'] else f'{i}' for i in obs_gen.columns]
obs_gen = obs_gen.merge(turb_info[['ID', 'capacity']], how='left', on=['ID'])
obs_gen = obs_gen.dropna().reset_index(drop=True)

for i in range(1,13):
    obs_gen['obs_'+str(i)] = obs_gen['obs_'+str(i)]/(((daysDuringMonth(obs_gen.year, i))*obs_gen['capacity'])*24)


obs_gen = obs_gen.drop(['capacity'], axis=1)
obs_gen = obs_gen.sort_values(by=['ID','year']).reset_index(drop=True)
obs_gen

Unnamed: 0,ID,year,obs_1,obs_2,obs_3,obs_4,obs_5,obs_6,obs_7,obs_8,obs_9,obs_10,obs_11,obs_12
0,0000000044_E1,2015,0.119402,0.109819,0.104127,0.077930,0.082066,0.067322,0.072649,0.058472,0.089848,0.067093,0.136548,0.133591
1,0000000044_E1,2016,0.148005,0.160959,0.127405,0.089613,0.080062,0.057473,0.052848,0.061672,0.053158,0.073204,0.124678,0.079410
2,0000000044_E1,2017,0.106120,0.145694,0.137468,0.078337,0.077585,0.078801,0.088521,0.063991,0.087904,0.107737,0.124232,0.174759
3,0000000044_E1,2018,0.207637,0.159707,0.162313,0.115942,0.083022,0.073531,0.057142,0.072849,0.086501,0.129369,0.150310,0.189606
4,0000000044_E2,2015,0.119402,0.109819,0.104127,0.077930,0.082066,0.067322,0.072649,0.058472,0.089848,0.067093,0.136548,0.133591
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40511,0100283090_E7,2018,0.207637,0.159707,0.162313,0.115942,0.083022,0.073531,0.057142,0.072849,0.086501,0.129369,0.150310,0.189606
40512,0100283090_E8,2015,0.119402,0.109819,0.104127,0.077930,0.082066,0.067322,0.072649,0.058472,0.089848,0.067093,0.136548,0.133591
40513,0100283090_E8,2016,0.148005,0.160959,0.127405,0.089613,0.080062,0.057473,0.052848,0.061672,0.053158,0.073204,0.124678,0.079410
40514,0100283090_E8,2017,0.106120,0.145694,0.137468,0.078337,0.077585,0.078801,0.088521,0.063991,0.087904,0.107737,0.124232,0.174759


# NETHERLANDS

In [2]:
# https://nationaalgeoregister.nl/geonetwork/srv/dut/catalog.search#/metadata/90f5eab6-9cea-4869-a031-2a228fb82fea
nl_md = gpd.read_file("input/country-data/NL/nl_md.json").to_crs(epsg=4326)
nl_md['lon'] = nl_md.geometry.x  
nl_md['lat'] = nl_md.geometry.y
nl_md = nl_md.drop(columns=['geometry','x','y','prov_naam','gem_naam','naam'])
nl_md["ondergrond"] = nl_md["ondergrond"].replace({"land": "onshore", "zee": "offshore"})
nl_md["land"] = nl_md["land"].replace({"België": "BE", "Duitsland": "DE", "Nederland": "NL"})
nl_md.columns = [
    "ID",
    "diameter",
    "height",
    "capacity",
    "country",
    "manufacturer",
    "type",
    "lon",
    "lat"
]
nl_md = nl_md.loc[nl_md['country']=='NL'].reset_index(drop=True).drop(columns=['country'])
nl_md = nl_md[["ID","capacity","diameter","height","manufacturer","lon","lat","type"]]
nl_md['manufacturer'] = nl_md['manufacturer'].str.split(' ').str[0].str.strip('123-.,').astype(str)
turb_info = vwf_data.add_models(nl_md)
turb_info
# nl_md['manufacturer'].str.strip().unique()

Total observed turbines/farms before conditions:  4187


Unnamed: 0,ID,type,capacity,diameter,height,lon,lat,model,p_density
0,rivm_20250101_windturbines_ashoogte.1000,offshore,8000,167.0,116.0,3.050230,51.625834,Siemens.Gamesa.SG.8.0.167,365.230606
1,rivm_20250101_windturbines_ashoogte.1001,onshore,4300,149.0,135.0,4.695033,51.647248,Nordex.N131.3300,246.607362
2,rivm_20250101_windturbines_ashoogte.1004,offshore,9500,164.0,116.0,3.009498,51.625462,MHI.Vestas.V164.9500,449.723962
3,rivm_20250101_windturbines_ashoogte.1007,onshore,4300,136.0,136.0,3.700501,51.637814,REpower.MM92.2000,296.006166
4,rivm_20250101_windturbines_ashoogte.1008,onshore,4000,127.0,96.0,3.708572,51.638140,Alstom.Eco.110,315.764039
...,...,...,...,...,...,...,...,...,...
4166,rivm_20250101_windturbines_ashoogte.992,onshore,4200,127.0,99.0,3.712879,51.635214,Vestas.V164.7000,331.552241
4167,rivm_20250101_windturbines_ashoogte.993,onshore,4300,149.0,135.0,4.687090,51.645316,Nordex.N131.3300,246.607362
4168,rivm_20250101_windturbines_ashoogte.995,offshore,8000,167.0,116.0,3.090638,51.625454,Siemens.Gamesa.SG.8.0.167,365.230606
4169,rivm_20250101_windturbines_ashoogte.996,onshore,4200,127.0,99.0,3.724388,51.636273,Vestas.V164.7000,331.552241


In [None]:
# https://ec.europa.eu/eurostat/databrowser/view/nrg_cb_pem__custom_19402431/default/table
year_star = 2015 # start year of training period
year_end = 2018
ns_data = pd.read_csv("input/country-data/northsea_country_generation.csv")
ns_data = ns_data.loc[
    :, [
    "Standard international energy product classification (SIEC)", 
    "TIME_PERIOD", 
    "OBS_VALUE",
    "geo",
    ]]
ns_data.columns = [
    "carrier",
    "date",
    "output",
    "country",
    ]

country = 'NL'  # Example country code
ns_data = ns_data.loc[(ns_data['country']==country) & (ns_data['carrier']=='Wind')].reset_index(drop=True)
ns_data['date'] = pd.to_datetime(ns_data['date'])
# convert from gigawatt hours to kilowatt hours
ns_data['output'] = pd.to_numeric(ns_data['output'])
ns_data['output'] = ns_data['output'] * 1e6
ns_data['year'] = ns_data['date'].dt.year.astype(int)
ns_data['month'] = ns_data['date'].dt.month.astype(int)
ns_data = ns_data.drop(columns=['date'])

ns_data = ns_data.fillna(0).groupby(['year','month'])['output'].sum().reset_index()

turb_info["ratio"] = turb_info['capacity'] / turb_info['capacity'].sum()

ns_data = ns_data.merge(turb_info[['ID', 'ratio']], how="cross")
ns_data["output"] = ns_data["output"] * ns_data["ratio"]
ns_data = ns_data.dropna(subset=['ID', 'year', 'month'])
ns_data = ns_data.loc[(ns_data["year"] >= year_star) & (ns_data["year"] <= year_end)].reset_index(drop=True)   
obs_gen = ns_data.pivot(index=['ID','year'], columns='month', values='output').reset_index()
# obs_gen = ns_data.pivot(index=['type','year'], columns='month', values='output').reset_index()
obs_gen.columns = [f'obs_{i}' if i not in ['ID', 'year'] else f'{i}' for i in obs_gen.columns]
obs_gen = obs_gen.merge(turb_info[['ID', 'capacity']], how='left', on=['ID'])
obs_gen = obs_gen.dropna().reset_index(drop=True)



for i in range(1,13):
    obs_gen['obs_'+str(i)] = obs_gen['obs_'+str(i)]/(((daysDuringMonth(obs_gen.year, i))*obs_gen['capacity'])*24)

obs_gen = obs_gen.drop(['capacity'], axis=1)
obs_gen = obs_gen.sort_values(by=['ID','year']).reset_index(drop=True)
obs_gen

NameError: name 'pd' is not defined

In [5]:
turb_info.lat.min(), turb_info.lat.max(), turb_info.lon.min(), turb_info.lon.max()

(50.84785605236023, 54.073136499178204, 2.774671661010408, 7.1778683054236145)

In [91]:
# https://data.open-power-system-data.org/time_series.com
data = pd.read_csv("input/country-data/time_series_60min_singleindex_filtered.csv")
data

Unnamed: 0,utc_timestamp,cet_cest_timestamp,BE_wind_generation_actual,BE_wind_offshore_generation_actual,BE_wind_onshore_generation_actual,FR_wind_onshore_generation_actual,NL_wind_generation_actual,NL_wind_offshore_generation_actual,NL_wind_onshore_generation_actual,NO_wind_onshore_generation_actual,NO_1_wind_onshore_generation_actual,NO_2_wind_onshore_generation_actual,NO_3_wind_onshore_generation_actual,NO_4_wind_onshore_generation_actual,NO_5_wind_onshore_generation_actual
0,2014-12-31T23:00:00Z,2015-01-01T00:00:00+0100,,,,,,,,,,,,,
1,2015-01-01T00:00:00Z,2015-01-01T01:00:00+0100,,,,,1451.0,145.0,1306.0,,,,,,
2,2015-01-01T01:00:00Z,2015-01-01T02:00:00+0100,734.81,518.66,216.15,1464.0,1447.0,145.0,1302.0,479.40,,158.60,233.53,68.67,18.6
3,2015-01-01T02:00:00Z,2015-01-01T03:00:00+0100,766.64,529.46,237.18,1543.0,1479.0,148.0,1331.0,422.74,,149.20,200.27,54.67,18.6
4,2015-01-01T03:00:00Z,2015-01-01T04:00:00+0100,733.13,406.94,326.19,1579.0,1340.0,134.0,1206.0,408.35,,143.50,192.22,54.03,18.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50396,2020-09-30T19:00:00Z,2020-09-30T21:00:00+0200,1889.72,1497.40,392.32,3632.0,870.0,589.0,281.0,1533.20,94.20,964.93,376.02,98.05,
50397,2020-09-30T20:00:00Z,2020-09-30T22:00:00+0200,2154.67,1688.06,466.61,3965.0,978.0,650.0,328.0,1645.93,86.20,1034.32,425.09,100.32,
50398,2020-09-30T21:00:00Z,2020-09-30T23:00:00+0200,2187.48,1715.76,471.72,4201.0,988.0,705.0,284.0,1635.35,80.69,1008.48,436.27,109.91,
50399,2020-09-30T22:00:00Z,2020-10-01T00:00:00+0200,2225.62,1739.17,486.45,4428.0,912.0,662.0,250.0,1698.59,84.76,1003.72,505.29,104.82,
