In [1]:
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
import pandas as pd
from tqdm import tqdm

The objective of this code is to use the virtual wind farm model we obtained, with the power curve resulting in the best skill for Ireland and transform the ERA5 data from 1940 to 2023 to get a wind time series. We then store this series for further analysis.

### Necessary steps:

Step 1: Load the ERA5 data and convert it into wind speed

Step 2: Load the power curve data (multiple turbines and smoothings)

Step 3: Create a function that given a power curve, returns the ERA5 data transformed by the CF

Step 4: Load the farm data

Step 5: Allocate the farm data onto a grid with the same shape as the ERA5 one

Step 6: Calculate the wind CF from ERA5 and store it

### Step 1: Load the ERA5 data and convert it into wind speed

2018/19-2023

In [None]:
ds_uv100_2019_2023 = xr.open_dataset('../data/ERA5/ERA5_100m_u_v_hourly_2019_2023.nc')
ds_uv100_2019_2023 = ds_uv100_2019_2023.reduce(np.nanmean, dim='expver',keep_attrs=True)

1988-2018/19

In [None]:
years_start_uv100 = [1988, 1991, 1997, 2003]
years_end_uv100 = [1990, 1996, 2002, 2008]
ds_uv100_1988_2008 = xr.open_mfdataset(['../data/ERA5/ERA5_100m_u_v_hourly_{}_{}.nc'.format(years_start_uv100[ii], years_end_uv100[ii]) for ii in range(4)])

ds_uv100_2009_2018 = xr.open_dataset('../data/ERA5/ERA5_100m_u_v_hourly_2009_2018.nc').rename({'lat':'latitude', 'lon':'longitude'})

1940-1987

In [None]:
list_files_era5_old = []
filepath_base = '../data/ERA5/ERA5_all_variables_{}_{:02d}.nc'
for year in np.arange(1940,1988, dtype=int):
    for month in np.arange(1,13, dtype=int):
        list_files_era5_old.append(filepath_base.format(year, month))

In [5]:
ds_1940_1987 = xr.open_mfdataset(list_files_era5_old)

In [6]:
ds_1940_1987 = ds_1940_1987[['u100', 'v100']]

Join all the times and variables together into a single dataset

In [7]:
ds = xr.concat([ds_1940_1987, ds_uv100_1988_2008, ds_uv100_2009_2018, ds_uv100_2019_2023], dim='time')

In [8]:
ds_era5 = np.sqrt(ds.u100**2. + ds.v100**2.)

In [9]:
ds_era5

Unnamed: 0,Array,Chunk
Bytes,1.44 GiB,128.00 MiB
Shape,"(736344, 21, 25)","(63913, 21, 25)"
Dask graph,583 chunks in 2334 graph layers,583 chunks in 2334 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.44 GiB 128.00 MiB Shape (736344, 21, 25) (63913, 21, 25) Dask graph 583 chunks in 2334 graph layers Data type float32 numpy.ndarray",25  21  736344,

Unnamed: 0,Array,Chunk
Bytes,1.44 GiB,128.00 MiB
Shape,"(736344, 21, 25)","(63913, 21, 25)"
Dask graph,583 chunks in 2334 graph layers,583 chunks in 2334 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


### Step 2: Load the power curve data (multiple turbines and smoothings)

In [None]:
path_power_curve = '../data/renewables_ninja/Wind Turbine Power Curves ~ 5 (0.01ms with 0.{}0 w smoother).csv'
df_power_curve_smooth2 = pd.read_csv(path_power_curve.format(2))

### Step 3: Create a function that given a power curve, returns the ERA5 data transformed by the CF

We transform the wind speeds into capacity factors. We take advantage of the way the power curve is sorted, with information each 0.01 m/s wind speed. This allows for an easy transformation from wind speed to capacity factor by straight up transforming the wind speed into an index to directly index the power curve, which is expressed in terms of capacity factor.

In [11]:
indexes_sel_power_curve = (ds_era5.round(2)*100).astype(int)
indexes_sel_power_curve = indexes_sel_power_curve.clip(0,4000) # too high or low wind speeds are constrained to the upper and lower limits of the power curve

In [12]:
def get_cf_wind(power_curve):
    power_curve_array = power_curve.to_numpy()
    cf_wind = xr.apply_ufunc(
        lambda x: power_curve_array[x],  # Mapping function
        indexes_sel_power_curve,         # Input DataArray
        dask="parallelized",             # Enable Dask for large arrays
        output_dtypes=[float],           # Specify output data type
    )
    cf_wind = xr.DataArray(
    cf_wind,
    coords=ds_era5.coords,  # Preserve the original coordinates
    dims=ds_era5.dims       # Preserve the original dimensions
    )
    return cf_wind

### Step 4: Load the farm data

Since at this point we use a single turbine, the turbines are joined into farms, as it makes no sense to differentiate between the turbine models at the same farm.

In [13]:
path_farms = '/Users/aina/Library/Application Support/Cryptomator/mnt/eirgrid_data/farms_turbines_models.csv'
df_farms = pd.read_csv(path_farms)
df_farms = df_farms[['Unit', 'InstalledCapacity', 'Latitude', 'Longitude']]
df_farms = df_farms.groupby('Unit').mean()

### Step 5: Allocate the farm data onto a grid with the same shape as the ERA5 one

In [14]:
da_capacity = xr.DataArray(coords={'latitude':ds_era5.coords['latitude'].values, 'longitude':ds_era5.coords['longitude'].values}, dims=('latitude', 'longitude'))

Iterate over farms and fill up the DataArray with capacity information

In [15]:
da_capacity[:,:] = 0.

lats = da_capacity['latitude']
lons = da_capacity['longitude']
for name_farm, row in df_farms.iterrows():
    lon_farm, lat_farm, cap_farm = row[['Longitude', 'Latitude', 'InstalledCapacity']]
    indices_loc = np.where(((lats - lat_farm)**2 + (lons - lon_farm)**2) == ((lats - lat_farm)**2 + (lons - lon_farm)**2).min())
    da_capacity[indices_loc[0][0], indices_loc[1][0]] += cap_farm

### Step 6: Calculate the wind CF from ERA5 and store it

In [20]:
power_curve = df_power_curve_smooth2['NEG.Micon.NM60.1000']
cf_wind = get_cf_wind(power_curve)
cf_turbine = cf_wind.weighted(da_capacity).mean(dim=['latitude', 'longitude'])

In [24]:
cf_turbine.to_netcdf('../data/wind_onshore_cf_1940_2023.nc')