In [23]:
import os
from glob import glob

import dask
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe

from utils import city_list

## Preliminaries

In [7]:
################
#### Paths #####
################
# Update these for reproduction

project_data_path = "/storage/group/pches/default/users/dcl5300/conus_comparison_lafferty-etal-2024/"
project_code_path = "/storage/home/dcl5300/work/current_projects/conus_comparison_lafferty-etal-2024/"
nex_path = "/storage/group/pches/default/public/NEX-GDDP-CMIP6/models/"  # location of NEX-GDDP models

In [8]:
##############
### Models ###
##############

model_info = {}
for model in os.listdir(nex_path):
    try:
        tmp = glob(f"{nex_path}/{model}/ssp126/tasmax/*_2015.nc")
        tmp = (
            tmp[0]
            .replace(f"{nex_path}/{model}", "")
            .replace("/ssp126/tasmax/tasmax_day_" + model + "_ssp126", "")
            .replace("2015.nc", "")
        )
        model_info.update({model: tmp})
    except:
        continue

print(f"# models: {len(model_info)}")

# models: 29


In [4]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    account="pches",
    # account="open",
    cores=1,
    memory="8GiB",
    walltime="00:30:00"
)

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client

client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.0.155:34171,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Calculate metrics

In [None]:
###############################
# Metric calulcation function #
###############################
def calculate_metric(model, ssp, year, var, metric, model_info, nex_path, out_path):
    """
    Inputs: selected model, ssp, variable, and metric to calculate (from NEX-GDDP-CMIP6)
    Outputs: calculated (annual) metric (max, avg, sum)
    """ 
    ## First check if done
    out_str = f"{var}_day_{model}_{ssp}{model_info}{str(year)}.nc"
    if os.path.isfile(f"{out_path}/{metric}_{out_str}"):
        return None

    try:
        ## Read correct file (use v1.1 if available)
        file_path = f"{nex_path}/{model}/{ssp}/{var}/{var}_day_{model}_{ssp}{model_info}{str(year)}_v1.1.nc"
        if os.path.isfile(file_path):
            pass
        elif os.path.isfile(file_path.replace("_v1.1","")):
            file_path = file_path.replace("_v1.1","")
        else:
            return None
        ds = xr.open_dataset(file_path)
    
        ## Convert units
        # Temperature: K -> C
        if var == "tas" and ds.tas.attrs["units"] == "K":
            ds["tas"] = ds["tas"] - 273.15
        if var == "tasmax" and ds.tasmax.attrs["units"] == "K":
            ds["tasmax"] = ds["tasmax"] - 273.15
        if var == "tasmin" and ds.tasmin.attrs["units"] == "K":
            ds["tasmin"] = ds["tasmin"] - 273.1
            
        # Precip: kg m-2 s-1 -> mm day-1
        if var == "pr" and ds.pr.attrs["units"] == "kg m-2 s-1":
            ds["pr"] = ds["pr"] * 86400
            ds.pr.attrs["units"] = "mm/day"
        
        ## Calculate metric
        if metric == "avg":
            ds_out = ds.resample(time="1Y").mean()
        elif metric == "max":
            ds_out = ds.resample(time="1Y").max()
        elif metric == "sum":
            ds_out = ds.resample(time="1Y").sum()
    
        ## Store
        ds_out.to_netcdf(f"{out_path}/{metric}_{out_str}")
            
    # Log if error
    except Exception as e:
        except_path = f"{project_code_path}/code/logs"
        with open(f"{except_path}/{model}_{ssp}_{var}_NEX.txt", "w") as f:
            f.write(str(e))

In [6]:
%%time
#########################
## Average Temperature ##
#########################
var = "tas"
metric = "avg"

out_path = f"{project_data_path}/metrics/NEX-GDDP-CMIP6/"

# Parallelize over dask delayed
delayed = []

# Loop through models
models = list(model_info.keys())
for model in models:
    # Loop through SSPs
    ssps = os.listdir(f"{nex_path}/{model}")
    for ssp in ssps:
        if ssp == "historical":
            continue
        # Loop through years
        for year in range(2015,2101):
            # Calculate metric
            delayed.append(dask.delayed(calculate_metric)(model = model,
                                                          ssp = ssp,
                                                          year = year,
                                                          var = var,
                                                          metric = metric,
                                                          model_info = model_info[model],
                                                          nex_path = nex_path,
                                                          out_path = out_path))
                
# Compute
print(f"# computations: {len(delayed)} \n")
_ = dask.compute(*delayed)

# computations: 9546 

CPU times: user 13.4 s, sys: 569 ms, total: 14 s
Wall time: 29.4 s


In [44]:
%%time
#########################
## Maximum Temperature ##
#########################
var = "tasmax"
metric = "max"

out_path = f"{project_data_path}/metrics/NEX-GDDP-CMIP6/"

# Parallelize over dask delayed
delayed = []

# Loop through models
models = list(model_info.keys())
for model in models:
    # Loop through SSPs
    ssps = os.listdir(f"{nex_path}/{model}")
    for ssp in ssps:
        if ssp == "historical":
            continue
        # Loop through years
        for year in range(2015,2101):
            # Calculate metric
            delayed.append(dask.delayed(calculate_metric)(model = model,
                                                          ssp = ssp,
                                                          year = year,
                                                          var = var,
                                                          metric = metric,
                                                          model_info = model_info[model],
                                                          nex_path = nex_path,
                                                          out_path = out_path))
                
# Compute
print(f"# computations: {len(delayed)} \n")
_ = dask.compute(*delayed)

# computations: 9546 

CPU times: user 2min 16s, sys: 10.9 s, total: 2min 27s
Wall time: 43min


In [7]:
%%time
#########################
## Total Precipitation ##
#########################
var = "pr"
metric = "sum"

out_path = f"{project_data_path}/metrics/NEX-GDDP-CMIP6/"

# Parallelize over dask delayed
delayed = []

# Loop through models
models = list(model_info.keys())
for model in models:
    # Loop through SSPs
    ssps = os.listdir(f"{nex_path}/{model}")
    for ssp in ssps:
        if ssp == "historical":
            continue
        # Loop through years
        for year in range(2015,2101):
            # Calculate metric
            delayed.append(dask.delayed(calculate_metric)(model = model,
                                                          ssp = ssp,
                                                          year = year,
                                                          var = var,
                                                          metric = metric,
                                                          model_info = model_info[model],
                                                          nex_path = nex_path,
                                                          out_path = out_path))
                
# Compute
print(f"# computations: {len(delayed)} \n")
_ = dask.compute(*delayed)

# computations: 9546 

CPU times: user 12min 45s, sys: 45.9 s, total: 13min 31s
Wall time: 1h 10min 12s


In [6]:
%%time
#########################
## Max. Precipitation ##
#########################
var = "pr"
metric = "max"

out_path = f"{project_data_path}/metrics/NEX-GDDP-CMIP6/"

# Parallelize over dask delayed
delayed = []

# Loop through models
models = list(model_info.keys())
for model in models:
    # Loop through SSPs
    ssps = os.listdir(f"{nex_path}/{model}")
    for ssp in ssps:
        if ssp == "historical":
            continue
        # Loop through years
        for year in range(2015,2101):
            # Calculate metric
            delayed.append(dask.delayed(calculate_metric)(model = model,
                                                          ssp = ssp,
                                                          year = year,
                                                          var = var,
                                                          metric = metric,
                                                          model_info = model_info[model],
                                                          nex_path = nex_path,
                                                          out_path = out_path))
                
# Compute
print(f"# computations: {len(delayed)} \n")
_ = dask.compute(*delayed)

# computations: 9546 

CPU times: user 1min 15s, sys: 3.2 s, total: 1min 19s
Wall time: 4min 28s


# Regrid

### Conservative

In [6]:
# We use LOCA grid as target
loca_lat_grid = np.linspace(23.90625, 53.46875, 474)
loca_lon_grid = np.linspace(234.53125, 293.46875, 944)
    
ds_out = xr.Dataset({"lat": (["lat"], loca_lat_grid,
                             {"standard_name": "latitude", "units": "degrees_north"}),
                     "lon": (["lon"], loca_lon_grid,
                             {"standard_name": "longitude", "units": "degrees_east"})
                    })
    
# Add mask from LOCA output
loca_nans = np.load(f'{project_code_path}/code/utils/LOCA2_NaNs.npy')
ds_out["mask"] = xr.DataArray(~loca_nans, dims=['lat','lon'])
    
# NEX grid to construct regridder
ds_in = xr.open_dataset(f"{project_data_path}/metrics/NEX-GDDP-CMIP6/avg_tas_day_UKESM1-0-LL_ssp585_r1i1p1f2_gn_2100.nc")
    
# Conservative
regridder = xe.Regridder(ds_in, ds_out, "conservative")

In [7]:
# Rename to include metric
def _preprocess(ds):
    metric_var = ds.encoding['source'].split('NEX-GDDP-CMIP6/')[1].split('_day_')[0]
    var = metric_var.split('_')[1]
    return ds.rename({var: metric_var})

In [8]:
%%time

# Out path
out_path = f"{project_data_path}/metrics_regridded/NEX-GDDP-CMIP6/conservative"

# Loop through all
models = list(model_info.keys())
for model in models:
    # Loop through SSPs
    ssps = os.listdir(f"{nex_path}/{model}")
    for ssp in ssps:
        if ssp == "historical":
            continue

        if not os.path.isfile(f"{out_path}/{model}_{ssp}.nc"):
            # Read lazy
            ds_nex_in = xr.open_mfdataset(f"{project_data_path}/metrics/NEX-GDDP-CMIP6/*_{model}_{ssp}_*.nc",
                                         preprocess=_preprocess)
        
            # Regrid lazy
            # NOTE: use high NaN threshold to try to not introduce NaNs
            # not already present in the LOCA2 grid
            ds_nex_out = regridder(ds_nex_in, skipna=True, na_thres=0.99)
        
            # Store
            ds_nex_out.to_netcdf(f"{out_path}/{model}_{ssp}.nc")
            print(f"{model} {ssp}")

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-CM2 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-CM2 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-CM2 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-CM2 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-ESM1-5 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-ESM1-5 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-ESM1-5 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


ACCESS-ESM1-5 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


BCC-CSM2-MR ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


BCC-CSM2-MR ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


BCC-CSM2-MR ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


BCC-CSM2-MR ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CMCC-ESM2 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CMCC-ESM2 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CMCC-ESM2 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CMCC-ESM2 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-CM6-1 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-CM6-1 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-CM6-1 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-CM6-1 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-ESM2-1 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-ESM2-1 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-ESM2-1 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CNRM-ESM2-1 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CanESM5 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CanESM5 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CanESM5 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


CanESM5 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3-Veg-LR ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3-Veg-LR ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3-Veg-LR ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


EC-Earth3-Veg-LR ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


FGOALS-g3 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


FGOALS-g3 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


FGOALS-g3 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


FGOALS-g3 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GFDL-ESM4 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GFDL-ESM4 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GFDL-ESM4 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GFDL-ESM4 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GISS-E2-1-G ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GISS-E2-1-G ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GISS-E2-1-G ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


GISS-E2-1-G ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


HadGEM3-GC31-LL ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


HadGEM3-GC31-LL ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


HadGEM3-GC31-LL ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


HadGEM3-GC31-MM ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


HadGEM3-GC31-MM ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM4-8 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM4-8 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM4-8 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM4-8 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KACE-1-0-G ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KACE-1-0-G ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KACE-1-0-G ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KACE-1-0-G ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KIOST-ESM ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KIOST-ESM ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


KIOST-ESM ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC-ES2L ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC-ES2L ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC-ES2L ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC-ES2L ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC6 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC6 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC6 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MIROC6 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-HR ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-HR ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-HR ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-HR ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-LR ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-LR ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-LR ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MPI-ESM1-2-LR ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MRI-ESM2-0 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MRI-ESM2-0 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MRI-ESM2-0 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


MRI-ESM2-0 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NESM3 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NESM3 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NESM3 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-LM ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-LM ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-LM ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-LM ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-MM ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-MM ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-MM ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


NorESM2-MM ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


TaiESM1 ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


TaiESM1 ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


TaiESM1 ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


TaiESM1 ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


UKESM1-0-LL ssp126


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


UKESM1-0-LL ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


UKESM1-0-LL ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


UKESM1-0-LL ssp585
CPU times: user 22min 2s, sys: 2min 22s, total: 24min 25s
Wall time: 41min 55s


# Summaries

## Indices

In [None]:
# Simple preprocessing function to add model and year coordinates
def _preprocess(ds):
    # Add model and SSP as coordinates
    model = ds.encoding['source'].split('/')[-1].split('_')[0]
    ds = ds.assign_coords(model = model)

    # Time -> year
    ds['time'] = ds['time'].dt.year

    return ds

In [48]:
# Calculates summary indices for NEX-GDDP-CMIP6 ensemble for given SSP
def get_summary_indices(ssp, years, out_path, out_str):
    """
    Current summary indices calculated: mean, 99th quantile, 99% quantile range
    `years` define the window over which all outputs are pooled. 
    """
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.nc"):
        # Read all
        ds = xr.open_mfdataset(f"{project_data_path}/metrics_regridded/NEX-GDDP-CMIP6/conservative/*_{ssp}.nc", chunks='auto',
                               preprocess=_preprocess, combine='nested', concat_dim=['model'])

        # Time slice
        ds_sel = ds.sel(time=slice(years[0],years[1]))
    
        ## Summary indices
        # Mean
        ds_mean = ds_sel.mean(dim=['model', 'time']).assign_coords(indice = 'mean')
        # Quantiles
        ds_qlow = ds_sel.chunk(dict(model=-1)).quantile(0.005, dim=['model', 'time'])
        ds_qhigh = ds_sel.chunk(dict(model=-1)).quantile(0.995, dim=['model', 'time'])
        ds_qrange = (ds_qhigh - ds_qlow).assign_coords(indice = '99range')
    
        ds_q99 = ds_sel.chunk(dict(model=-1)).quantile(0.99, dim=['model', 'time']).assign_coords(indice = 'q99')

        # Store
        ds_out = xr.concat([ds_mean, ds_qrange, ds_q99], dim='indice')
        ds_out.to_netcdf(f"{out_path}/{out_str}.nc")

In [50]:
%%time
for years in [[2020,2040], [2050,2070], [2080,2100]]:
    for ssp in ['ssp245', 'ssp370', 'ssp585']:
        get_summary_indices(ssp=ssp, years=years,
                            out_path=f"{project_data_path}/summary_indices",
                            out_str=f"NEX-GDDP-CMIP6_{ssp}_{str(years[0])}-{str(years[1])}")

CPU times: user 1min 26s, sys: 5.71 s, total: 1min 32s
Wall time: 4min 14s


## Raw data

In [21]:
# Calculates summary indices for NEX-GDDP-CMIP6 ensemble for given SSP
def get_raw_data(ssp, years, lat, lon, out_path, out_str):
    """
    Current summary indices calculated: mean, 99th quantile, 99% quantile range
    `years` define the window over which all outputs are pooled. 
    """
    def read_and_process(model, ssp, years, lat, lon):
        # Read
        ds_tmp = xr.open_dataset(f"{project_data_path}/metrics_regridded/NEX-GDDP-CMIP6/conservative/{model}_{ssp}.nc")
        ds_tmp['time'] = ds_tmp["time"].dt.year

        # Time slice
        ds_sel = ds_tmp.sel(time=slice(years[0],years[1]))
    
        # Location selection
        if lon < 0:
            lon = 360 + lon
        ds_sel = ds_sel.sel(lat=lat, lon=lon, method='nearest')
        
        # Construct dataframe
        metrics = ['avg_tas', 'sum_pr', 'max_tasmax', 'max_pr']
        df_tmp = ds_sel.to_dataframe().dropna(subset=metrics, how='all').drop(columns=["lat", "lon"]).reset_index()
        df_tmp["ssp"] = ssp
        df_tmp["model"] = model

        # Return 
        return df_tmp
        
    # Check if done
    if not os.path.isfile(f"{out_path}/{out_str}.csv"):
        df_delayed = []
        # Read all
        for model in model_info.keys():
            files = glob(f"{project_data_path}/metrics_regridded/NEX-GDDP-CMIP6/conservative/{model}_*.nc")
            ssps = [file.split('_')[-1].replace('.nc', '') for file in files]
            for ssp in ssps:
                df_tmp = dask.delayed(read_and_process)(model, ssp, years, lat, lon)
                df_delayed.append(df_tmp)
        
        # Compute and store
        df_out = dask.compute(*df_delayed)
        pd.concat(df_out).to_csv(f"{out_path}/{out_str}.csv", index=False)

In [27]:
%%time
for city in city_list.keys():
    lat, lon = city_list[city]
    for years in [[2020,2040], [2050,2070], [2080,2100]]:
        for ssp in ['ssp245', 'ssp370', 'ssp585']:
            get_raw_data(ssp=ssp, years=years,
                         lat=lat, lon=lon,
                         out_path=f"{project_data_path}/summary_raw",
                         out_str=f"{city}_NEX-GDDP-CMIP6_{ssp}_{str(years[0])}-{str(years[1])}")

CPU times: user 20.1 s, sys: 969 ms, total: 21.1 s
Wall time: 1min 5s
