# 07: Solar radiation and wind data aggregation
*Extract daily solar radiation and wind data for subsequent use in developing WBGT in the sun estimates in `08_shade_sun_adjustment.ipynb`.*

In [None]:
import logging

import dask
import numpy as np
import pandas as pd
import xarray as xr
from dask.distributed import Client
from metsim.datetime import date_range
from metsim.disaggregate import shortwave
from metsim.physics import solar_geom
from tqdm.notebook import tqdm
from utils import (
    gcm_list,
    gcms_with_nonstandard_calendars_list,
    load_nasanex,
    prep_sparse,
    remove_360_longitudes,
    spatial_aggregation,
)

Set up cluster to handle multiprocessing using a Dask client.

In [None]:
client = Client(threads_per_worker=1, n_workers=64, silence_logs=logging.ERROR)
client

In [None]:
scenario_years = [
    ("historical", np.arange(1985, 2015)),
    ("ssp245", np.arange(2015, 2061)),
]

As in `05_aggregate.ipynb`, aggregate the solar radiation and wind information into population-weighted region averages. Standardize calendars as was done in `06_bias_correction.ipynb`.

In [None]:
sample_ds = load_nasanex(
    "historical", "ACCESS-CM2", ["rsds", "sfcWind"], np.arange(1990, 1991)
)
population = xr.open_zarr(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/GHS_POP_E2030_GLOBE_R2023A_4326_30ss_V1_0_resampled_to_CP.zarr"
)
population = population.rename({"x": "lon", "y": "lat"}).drop("spatial_ref")
# Calculate sparse weights once to use for all of the data files.
sparse_weights, population = prep_sparse(
    sample_ds, population, return_population=True, variables_to_drop=["rsds", "sfcWind"]
)
for gcm in gcm_list:
    # loop through the gcms and extract the data for each region
    extracted_list = []
    for scenario, years in scenario_years:
        wind_solrad = load_nasanex(scenario, gcm, ["rsds", "sfcWind"], years)
        wind_solrad = remove_360_longitudes(wind_solrad)

        # ensure population matches the same lon coords order
        assert (population["lon"].values == wind_solrad["lon"].values).all()
        assert (population["lat"].values == wind_solrad["lat"].values).all()
        assert (
            population["population"].values.shape
            == wind_solrad["rsds"].isel(time=0).values.shape
        )
        # aggregate the wind_solrad to regional estimates
        extracted_wind_solrad = spatial_aggregation(
            wind_solrad, sparse_weights, region_name="processing_id"
        )
        del wind_solrad
        extracted_list.append(extracted_wind_solrad)
    extracted_wind_solrad_ds = xr.concat(extracted_list, dim="time")
    if gcm in gcms_with_nonstandard_calendars_list:
        # convert to standard calendar by filling with nans
        extracted_wind_solrad_ds = extracted_wind_solrad_ds.convert_calendar(
            "gregorian", dim="time", align_on="year", missing=np.nan, use_cftime=None
        )
        # gap fill by linearly interpolating
        extracted_wind_solrad_ds = extracted_wind_solrad_ds.interpolate_na(
            dim="time", method="linear"
        )
    output = f"s3://carbonplan-scratch/extreme-heat/wind_solrad-regions/{gcm}-wind-solrad-regions.zarr"
    t = extracted_wind_solrad_ds.to_zarr(
        output, consolidated=True, mode="w", compute=False
    )
    t = dask.optimize(t)[0]
    t.compute(retries=2)

Calculate representative elevation and latitude for each region, which will be used below by `metsim` for solar geometry calculations.

In [None]:
elev = xr.open_zarr(
    "s3://carbonplan-climate-impacts/extreme-heat/v1.0/inputs/elevation.zarr"
)
elev = elev.chunk({"lat": -1, "lon": -1}).compute()

sparse_weights = prep_sparse(
    sample_ds, population, return_population=False, variables_to_drop="elev"
)
elev = remove_360_longitudes(elev)
# attach a placeholder time timension
elev = elev.expand_dims(dim="time").assign_coords(
    {"time": pd.date_range("2000-01-01", "2000-01-01")}
)
assert (population["lon"].values == elev["lon"].values).all()
assert (population["lat"].values == elev["lat"].values).all()
assert (
    population["population"].values.shape == elev["elevation"].isel(time=0).values.shape
)
elev_regions = spatial_aggregation(
    elev, sparse_weights, region_name="processing_id"
).drop("time")

In [None]:
lat_ds = xr.Dataset(
    {
        "latitude": xr.DataArray(
            data=np.tile(elev.lat.values, (len(elev.lon.values), 1)).transpose(),
            coords={"lat": elev.lat.values, "lon": elev.lon.values},
        )
    }
)
# attach a placeholder time dimension
lat_ds = lat_ds.expand_dims(dim="time").assign_coords(
    {"time": pd.date_range("2000-01-01", "2000-01-01")}
)
assert (population["lon"].values == lat_ds["lon"].values).all()
assert (population["lat"].values == lat_ds["lat"].values).all()
assert (
    population["population"].values.shape
    == lat_ds["latitude"].isel(time=0).values.shape
)
lat_regions = spatial_aggregation(
    lat_ds, sparse_weights, region_name="processing_id"
).drop("time")

Take advantage of utilities in the `metsim` package, developed largely by Andrew Bennett at the University of Arizona. The `solar_geom`, `shortwave`, and `date_range` functions are slightly different from their implementations in the `metsim` package for this use case which focuses solely on solar radiation.

For each region, use elevation and latitude information calculated above to derive radiation parameters like day length and subdaily maximum solar radiation. This calculation only needs to be done once because, while it varies in time throughout the year for every location, it will be the same for every projection.

In [None]:
tiny_rad_fract_list, daylength_list = [], []
for processing_id in tqdm(elev_regions.processing_id.values):
    sg = solar_geom(
        elev_regions.sel(processing_id=processing_id)["elevation"].values[0],
        lat_regions.sel(processing_id=processing_id)["latitude"].values[0],
        -6.5,
    )
    tiny_rad_fract_list.append(
        xr.DataArray(data=sg[0], dims=("dayofyear", "tiny_timestep"))
    )
    daylength_list.append(xr.DataArray(data=sg[1], dims=("dayofyear")))
radiation_parameters = xr.Dataset(
    {
        "tiny_rad_fract": xr.concat(tiny_rad_fract_list, dim="processing_id"),
        "daylength": xr.concat(daylength_list, dim="processing_id"),
    }
)
radiation_parameters = radiation_parameters.assign_coords(
    {"processing_id": elev_regions.processing_id.values}
)

radiation_parameters = radiation_parameters.chunk(
    {"dayofyear": -1, "tiny_timestep": -1, "processing_id": 4000}
)
radiation_parameters.to_zarr(
    "s3://carbonplan-scratch/extreme-heat/wind_solrad-regions/radiation_parameters.zarr",
    mode="w",
)
radiation_parameters = xr.open_zarr(
    "s3://carbonplan-scratch/extreme-heat/wind_solrad-regions/radiation_parameters.zarr"
)

Set up a dataframe template to store the data and functions for calculating maximum daily solar radiation from the daily mean solar radiation.

In [None]:
params = {
    "time_step": 60,
    "method": "other",
    "utc_offset": False,
    "calendar": "gregorian",
}
df_daily_template = pd.DataFrame(index=pd.date_range("1985-01-01", "2060-12-31"))
stop = (
    df_daily_template.index[-1]
    + pd.Timedelta("1 days")
    - pd.Timedelta("{} minutes".format(params["time_step"]))
)
dates_disagg = date_range(
    df_daily_template.index[0],
    stop,
    freq="{}T".format(params["time_step"]),
    calendar=params["calendar"],
)
df_disagg_template = pd.DataFrame(index=dates_disagg)
yday = df_daily_template.index.dayofyear - 1

In [None]:
def shortwave_wrapper(rsds, daylengths, tiny_rad_fract):
    """
    Wrapper function for shortwave which supports vectorized computation
    via `xr.ufunc`
    """

    params = {
        "time_step": 60,
        "method": "other",
        "utc_offset": False,
        "calendar": "gregorian",
        "SW_RAD_DT": 3600,
    }
    dayofyear = pd.date_range("1985-01-01", "2060-12-31").dayofyear.values
    shortwave_out = shortwave(rsds, daylengths[yday], dayofyear, tiny_rad_fract, params)
    da = xr.DataArray(shortwave_out, dims=["hourlytime"])
    da = da.assign_coords(
        {
            "hourlytime": pd.date_range(
                "1985-01-01 00:00:00", "2060-12-31 23:00:00", freq="H"
            )
        }
    )
    output = da.resample({"hourlytime": "D"}).max().data
    return output

Calculate maximum solar radiation given daily mean solar radiation and radiation parameters (as calculated above). This approach accounts for the cooling effect of clouds but does not capture subdaily variations in cloud cover.

In [None]:
radiation_parameters = radiation_parameters.chunk(
    {"processing_id": 25, "tiny_timestep": -1, "dayofyear": -1}
)
for gcm in gcm_list:
    wind_solrad_ds = xr.open_zarr(
        f"s3://carbonplan-scratch/extreme-heat/wind_solrad-regions/{gcm}-wind-solrad-regions.zarr"
    ).persist()
    wind_solrad_ds = wind_solrad_ds.chunk({"processing_id": 25, "time": -1})

    max_solrad = xr.apply_ufunc(
        shortwave_wrapper,
        wind_solrad_ds["rsds"],
        radiation_parameters.daylength,
        radiation_parameters.tiny_rad_fract,
        input_core_dims=[["time"], ["dayofyear"], ["dayofyear", "tiny_timestep"]],
        output_core_dims=[["time"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[wind_solrad_ds["rsds"].dtype],
    )

    out_store = f"s3://carbonplan-scratch/extreme-heat/wind_solrad-regions/{gcm}-rsds-max-regions.zarr"
    max_solrad.to_zarr(out_store, mode="w", consolidated=True)