In [None]:
sharedinputpath = snakemake.params["sharedinputpath"]
desired_regions = snakemake.params.aggregated_regions
weatherdata = snakemake.input.weatherdata

cf_file = snakemake.output.cf_file


cmip_windspeed = snakemake.params.windspeed
cmip_solar = snakemake.params.solar
cmip_year = snakemake.wildcards.cmip_year
cmip_model = snakemake.wildcards.cmip_model
cmip_scenario = snakemake.wildcards.cmip_scenario
era5_year = int(snakemake.wildcards.year)
pv_type_pecd = snakemake.params.pv_type_pecd

cmip_windspeed_path = snakemake.params.windspeed_path
cmip_solar_cf_path = snakemake.params.solar_cf_path
"/fp/projects01/ec285/Europe_v1/data/PECD/v4.2/ERA5/100m_wind_speed/"
# path to cmip files based on cmip year and cmip model
if cmip_model=="ERA5":
    cmip_windspeed_file_path = f"{cmip_windspeed_path}/ERA5/100m_wind_speed/H_ERA5_ECMW_T639_WS-_0100m_Pecd_025d_S{cmip_year}*"
    cmip_solar_cf_file_path = f"{cmip_solar_cf_path}/ERA5/solar_photovoltaic_generation_capacity_factor/{pv_type_pecd}/H_ERA5_ECMW_T639_SPV_0000m_Pecd_025d_S{cmip_year}*"
else:
    cmip_windspeed_file_path = f"{cmip_windspeed_path}/{cmip_scenario}/100m_wind_speed/P_CMI6_{cmip_model}_WS-_0100m_Pecd_025d_S{cmip_year}*"
    cmip_solar_cf_file_path = f"{cmip_solar_cf_path}/{cmip_scenario}/solar_photovoltaic_generation_capacity_factor/{pv_type_pecd}/P_CMI6_{cmip_model}_SPV_0000m_Pecd_025d_S{cmip_year}*"



In [None]:
onshore_turbine = snakemake.params.windturbines.get('onshore')
offshore_bottom_turbine = snakemake.params.windturbines.get('offshore_bottom')
offshore_floating_turbine = snakemake.params.windturbines.get('offshore_float')

panel = "CSi"
orientation = "latitude_optimal"

In [None]:
geodata_files = {
    "onshore": snakemake.input.euroshape,
    "offshore_bottom": snakemake.input.eurooffshoreshape,
}

In [None]:
import logging

import atlite

logging.basicConfig(level=logging.INFO)

import geopandas as gpd
import pandas as pd
import xarray as xr
import numpy as np

In [None]:
def maybe_swap_spatial_dims(ds, namex="x", namey="y"):
    """
    Swap order of spatial dimensions according to atlite concention.
    """
    swaps = {}
    lx, rx = ds.indexes[namex][[0, -1]]
    ly, uy = ds.indexes[namey][[0, -1]]

    if lx > rx:
        swaps[namex] = slice(None, None, -1)
    if uy < ly:
        swaps[namey] = slice(None, None, -1)

    return ds.isel(**swaps) if swaps else ds

In [None]:
%psource atlite.Cutout.convert_and_aggregate

In [None]:
boundaries = []
for geodata_file_name, geodata_file_path in geodata_files.items():
    print(geodata_file_path)
    boundaries.append(gpd.read_file(geodata_file_path))

boundaries = pd.concat(boundaries).bounds

In [None]:
boundaries = boundaries.groupby(lambda x: "bountry").agg(
    {"minx": "min", "miny": "min", "maxx": "max", "maxy": "max"}
)

# boundaries["minx"] = boundaries["minx"] - 2
# boundaries["miny"] = boundaries["miny"] - 2
# boundaries["maxx"] = boundaries["maxx"] + 2
# boundaries["maxy"] = boundaries["maxy"] + 2

In [None]:
ds = xr.open_dataset(weatherdata, chunks="auto")

In [None]:
cutout = atlite.Cutout(
    path="../3_intermediate_data/intermediatecutout.nc",
    data=ds.sel(
        x=slice(
            boundaries.loc["bountry", "minx"],
            boundaries.loc["bountry", "maxx"],
        ),
        y=slice(
            boundaries.loc["bountry", "miny"],
            boundaries.loc["bountry", "maxy"],
        ),
    ),
)

cutout.prepare()

In [None]:
# 100 wind speed from PECD

In [None]:

if cmip_windspeed:
    pan = xr.open_mfdataset(cmip_windspeed_file_path)
    pan = maybe_swap_spatial_dims(pan, namex="longitude", namey="latitude")

    # lat and lon for the new cutout (smaller than the original) - hardcoded
    lat_min = 32.0
    lat_max = 75.0
    lon_min = -13.0
    lon_max = 45.0

    cutout = cutout.sel(y=slice(lat_min,lat_max), x=slice(lon_min,lon_max))

    # create wind speed cutout from pan-european dataset
    wnd100m = (
        pan
        .rename({"longitude":"x","latitude":"y"})
        # .drop_vars("height") # v4.1
        .sel(time=slice(cmip_year,cmip_year))
        .rename({"ws100":"wnd100m"})
        ["wnd100m"]
    )
    wnd100m = (
        wnd100m
        .assign_coords(lon=wnd100m.coords["x"], lat=wnd100m.coords["y"])
    )
    wnd100m = wnd100m.sel(time=~((wnd100m.time.dt.month == 2) & (wnd100m.time.dt.day == 29))) #removing 29-feb (leap-year)

    # replace 2050 by 2010
    new_time_index = pd.DatetimeIndex([pd.Timestamp(t).replace(year=era5_year) for t in wnd100m.time.values])
    wnd100m["time"] = new_time_index
    # add attributes from era5 wnd100m
    # wnd100m = wnd100m.assign_attrs(era5_old["wnd100m"].attrs)
    attrs = cutout.data["wnd100m"].attrs
    # select the specific cutout from the pan-european
    wnd100m = wnd100m.sel(
        time=cutout.data.time, 
        x=cutout.data.x, 
        y=cutout.data.y, 
        method="nearest"
    )


    # Create new data array
    new_variable = xr.DataArray(
        data=wnd100m.values,  
        dims=wnd100m.dims,
        coords=wnd100m.coords,
        attrs=attrs  
    )

    # Add the variable to era5_pan
    cutout.data['wnd100m'] = new_variable

In [None]:
# Wind bias-correction - 100m wind speed

In [None]:
if snakemake.params.bias_correction:
    # Path to bias correction ratios
    windbiaspath = snakemake.input.biaswinddata

    # Get wind speed at 100m from cutout
    wnd_100m = cutout.data["wnd100m"]
    attrs = wnd_100m.attrs

    # Load dataset containing bias correction ratios
    gwa2_ratio = xr.open_dataset(windbiaspath, chunks="auto")

    # Select bias correction ratio for wind speed at 100m and
    # rename coordinates to the same names in cutout
    gwa2_ratio_100m = (
        gwa2_ratio
        # .sel(height=100, drop=True)
        # .drop_vars("spatial_ref")
        .rename({"longitude": "x", "latitude": "y"})
    )

    # Actual bias-correction ratios (from GWA2) has finner resolution
    # than ERA5 data (bias-correction ratios: 0.025, and ERA5: 0.25)
    # To match both data, we aggregate (average) ratios to ERA5 grid cells.
    # Also, the coordinates in both datasets do not match. So, we
    # interpolated according to ERA5 grid cells.
    # TODO: In the future, we can downscale ERA5 wind speed data to GWA2
    # resolution, exclude certain grid cells to make a more sophisticated
    # filter, and then upscale to the original ERA5 resolution.

    # For now, the code aggregates the GWA2 ratio to the ERA5 resolution
    # (from 0.025 to 0.25), interpolates to ERA5 grid cells, and then
    # corrects the wind speed.

    # Calculate number of points to aggregate

    # steps dx and dy (GWA2)
    dx_gwa2 = 0.025
    dy_gwa2 = 0.025
    # ERA5
    dx_era5 = 0.25
    dy_era5 = 0.25

    # Another way
    # dx_gwa2 = np.round(gwa2_ratio_100m.coords['x'].diff('x').values[0],4)
    # dy_gwa2 = np.round(gwa2_ratio_100m.coords['y'].diff('y').values[0],4)

    # dx_era5 = np.round(wnd_100m.coords['x'].diff('x').values[0],4)
    # dy_era5 = np.round(wnd_100m.coords['y'].diff('y').values[0],4)

    # Calculate the number of points to aggregate when the data is coarsened
    x_window = int(dx_era5 / dx_gwa2)
    y_window = int(dy_era5 / dy_gwa2)

    # Coarsen and interpolate gwa2 ratios resolution (0.025)
    # to the ERA5 resolution (0.25) by averaging
    gwa2_coarsened = (
        gwa2_ratio_100m.coarsen(x=x_window, y=y_window, boundary="pad")
        .mean()
        .interp_like(wnd_100m)
    )

    # Correct 100m wind speed
    cutout.data["wnd100m"] = (
        wnd_100m * gwa2_coarsened["ratio_gwa2_era5_mean_WS"]
    ).assign_attrs(attrs)  # copy also the attributes

In [None]:
# Solar CF

In [None]:
%psource atlite.Cutout.convert_and_aggregate

In [None]:
cf_solar = cutout.pv(
    panel=panel,
    orientation=orientation,
    capacity_factor_timeseries=True,
).astype(np.float32)

In [None]:
# Onshore wind CF

In [None]:
cf_windon = cutout.wind(
    turbine=onshore_turbine, 
    capacity_factor_timeseries=True,
    smooth=snakemake.params.wind_smooth
).astype(np.float32)*snakemake.params.windon_af

In [None]:
# Offshore wind CF

In [None]:
cf_windoff_bottom = cutout.wind(
    turbine=offshore_bottom_turbine, 
    capacity_factor_timeseries=True,
    smooth=snakemake.params.wind_smooth
).astype(np.float32)*snakemake.params.windoff_bottom_af

In [None]:
cf = xr.concat(
    [cf_solar, cf_windon, cf_windoff_bottom],
    pd.Index(["Solar", "Windonshore", "Windoffshore"], name="Tech"),
)

In [None]:
# Solar capacity factors PECD

In [None]:
# solar capacity factors from PECD

In [None]:

if cmip_solar:

    # open dataset
    pan = xr.open_mfdataset(cmip_solar_cf_file_path)
    pan = maybe_swap_spatial_dims(pan, namex="longitude", namey="latitude")

    # get solar capacity factors and put in correct format
    solar = (
        pan
        .rename({"longitude":"x","latitude":"y"})
        .rename({"spv_cf":"capacity factor"})
        .sel(time=slice(cmip_year,cmip_year))
    )

    solar = (
        solar
        .assign_coords(lon=solar.coords["x"], lat=solar.coords["y"])
        ["capacity factor"]
    )


    # lat and lon for the new cutout (smaller than the original) - hardcoded values
    lat_min = 35.0
    lat_max = 71.0
    lon_min = -11.5
    lon_max = 31.5

    cf_pan = cf.where(
        (cf.lat >= lat_min) & (cf.lat <= lat_max) &
        (cf.lon >= lon_min) & (cf.lon <= lon_max),
        drop=True
    )
    solar = solar.sel(time=~((solar.time.dt.month == 2) & (solar.time.dt.day == 29))) #removing 29-feb (leap-year)
    # replace 2050 by 2010
    new_time_index = pd.DatetimeIndex([pd.Timestamp(t).replace(year=era5_year) for t in solar.time.values])
    solar["time"] = new_time_index

    # add attributes from era5 wnd100m
    # wnd100m = wnd100m.assign_attrs(era5_old["wnd100m"].attrs)
    attrs = cf.sel(Tech = "Solar").attrs
    # select the specific cutout from the pan-european
    solar = solar.sel(
        time=cf_pan.time, 
        x=cf_pan.x, 
        y=cf_pan.y, 
        method="nearest"
    )

    # Create new data array
    new_variable = xr.DataArray(
        data=solar.values,  
        dims=solar.dims,
        coords=solar.coords,
        attrs=attrs  
    )

    new_variable = new_variable.to_dataset(name="capacity factor")

    # Put dimensions in the same order as era5 capacity factors
    new_variable_reordered = new_variable["capacity factor"].transpose("y", "time", "x")

    # Replace new variable
    cf_pan.loc[dict(Tech="Solar")] = new_variable_reordered

    cf = cf_pan

In [None]:
cf.to_netcdf(
    cf_file,
    encoding={
        "capacity factor": {
            "dtype": "int16",
            "scale_factor": 0.001,
            "_FillValue": -99,
            "zlib": True,
            "complevel": 1,
        }
    },
)