In [None]:
import os 
from glob import glob

import numpy as np
import jax
import jax.numpy as jnp

import xarray as xr
import xesmf as xe
import pandas as pd
import dask
import zarr

from utils.param_names import param_names
from utils.initial_params import constants
from utils.subsets import subsets
from utils.global_paths import project_data_path, project_code_path, loca_path
from src.read_inputs import read_projection_inputs
from src.prediction import make_prediction_vmap
from src.data_processing import _subset_states

In [2]:
############
### Dask ###
############
from dask_jobqueue import SLURMCluster

cluster = SLURMCluster(
    account="pches",
    # account="open",
    cores=1,
    memory="60GiB",
    walltime="02:00:00"
)
cluster.scale(jobs=30)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.0.159:46733,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# LOCA2

In [3]:
##############
### Models ###
##############

models = os.listdir(f"{loca_path}/")
models.remove('training_data')
models.remove('scripts')

loca_all = {}

# Loop through models
for model in models:
    loca_all[model] = {}
    # Loop through members
    members = os.listdir(f"{loca_path}/{model}/0p0625deg/")
    for member in members:
        # Append SSPs
        ssps = os.listdir(f"{loca_path}/{model}/0p0625deg/{member}/")
        loca_all[model][member] = ssps

# Matches website (https://loca.ucsd.edu/loca-version-2-for-north-america-ca-jan-2023/) as of Jan 2023
print(f"# models: {len(models)}")
print(f"# model/expts: {np.sum([len(np.unique([item for row in [loca_all[model][member] for member in loca_all[model].keys()] for item in row])) for model in models])}")
print(f"# model/expts/ens: {np.sum([len(loca_all[model][ssp]) for model in models for ssp in loca_all[model]])}")
print(f"# model/expts/ens (not including historical): {np.sum([len([ssp for ssp in loca_all[model][member] if ssp != 'historical']) for model in models for member in loca_all[model]])}")

# models: 27
# model/expts: 99
# model/expts/ens: 329
# model/expts/ens (not including historical): 221


## Regridding

In [8]:
###################
# Regrid function
###################
def regrid_subset(model, member, ssp, subset_name, list_of_states):
    # Read inputs
    tasmin_in = xr.open_mfdataset(f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/tasmin/*.nc", chunks="auto")
    tasmin_in["tasmin"] = tasmin_in["tasmin"] - 273.15
    tasmin_in["tasmin"].attrs["units"] = "degC"
    
    tasmax_in = xr.open_mfdataset(f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/tasmax/*.nc", chunks="auto")
    tasmax_in["tasmax"] = tasmax_in["tasmax"] - 273.15
    tasmax_in["tasmax"].attrs["units"] = "degC"

    tas_in = (tasmin_in["tasmin"] + tasmax_in["tasmax"]) / 2.0
    tas_in.attrs["units"] = "degC"
    
    pr_in = xr.open_mfdataset(f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/pr/*.nc", chunks="auto")
    pr_in["pr"] = pr_in['pr'] * 86400
    pr_in["pr"].attrs["units"] = "mm/day"

    # Merge
    ds_in = xr.merge([xr.Dataset({"tas": tas_in}), pr_in])

    # Construct out grid
    nldas_lats = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lat.npy")
    nldas_lons = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lon.npy")

    dr_out = xr.Dataset({
        "lat": (["lat"], nldas_lats,
                {"standard_name": "latitude", "units": "degrees_north"},),
        "lon": (["lon"], nldas_lons,
                {"standard_name": "longitude", "units": "degrees_east"},),
    })

    # Regrid conservatively
    regridder = xe.Regridder(ds_in, dr_out, "conservative")
    ds_out = regridder(ds_in, skipna=True, na_thres=0.99) # This threshold is somewhat subjective

    # Subset to states
    ds_out = _subset_states(ds_out, list_of_states)
    
    # Store 
    ds_out = ds_out.chunk({'time': 200 , 'lat':-1, 'lon':-1})
    compressor = zarr.Blosc(cname="zstd", clevel=3)
    encoding = {vname: {"compressor": compressor} for vname in ds_out.data_vars}
    ds_out.to_zarr(f"{project_data_path}/projections/{subset_name}/forcing/LOCA2/{model}_{member}_{ssp}.zarr",
                   encoding=encoding, mode='w-', consolidated=True)

In [9]:
## File path function
def make_loca_file_path(loca_path, model, member, ssp, var):
    """
    Returns list of file paths for a given downscaled LOCA output.
    """
    out_path = f"{loca_path}/{model}/0p0625deg/{member}/{ssp}/{var}"

    if os.path.isdir(out_path):
        files = os.listdir(out_path)
        files = [file for file in files if file[-7:] != 'ORIG.nc'] # Skip ORIGs (had to fix tasmin naming errors)
        return files
    else:
        return []

### eCONUS

In [10]:
subset_name = "eCONUS"
list_of_states = subsets[subset_name]

In [8]:
%%time
# Loop through models
for model in models:
    # Loop through members
    for member in loca_all[model].keys():
        # Loop through SSPs
        for ssp in loca_all[model][member]:
            if ssp == "historical":
                continue
            # Some vars are missing for some outputs: skip
            file_paths = make_loca_file_path(loca_path, model, member, ssp, "tasmin")
            if len(file_paths) == 0:
                print(f"Missing: {model} {ssp} {member}")

            # Check if done
            if not os.path.exists(f"{project_data_path}/projections/{subset_name}/forcing/LOCA2/{model}_{member}_{ssp}.zarr"):
                # Re-grid and subset
                try:
                    regrid_subset(model=model,
                                  member=member,
                                  ssp=ssp,
                                  subset_name=subset_name,
                                  list_of_states=list_of_states)
                except:
                    print(f"{model}_{member}_{ssp}")

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0_r3i1p1f1_ssp370


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


INM-CM5-0_r4i1p1f1_ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
This may cau

IPSL-CM6A-LR_r4i1p1f1_ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR_r5i1p1f1_ssp245


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


IPSL-CM6A-LR_r7i1p1f1_ssp370


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Co

Missing: MPI-ESM1-2-LR ssp585 r10i1p1f1
MPI-ESM1-2-LR_r10i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
    >>> with dask.config.set(**{

MPI-ESM1-2-LR_r4i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


Missing: MPI-ESM1-2-LR ssp585 r5i1p1f1
MPI-ESM1-2-LR_r5i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


Missing: MPI-ESM1-2-LR ssp585 r6i1p1f1
MPI-ESM1-2-LR_r6i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


Missing: MPI-ESM1-2-LR ssp585 r7i1p1f1
MPI-ESM1-2-LR_r7i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.


Missing: MPI-ESM1-2-LR ssp585 r8i1p1f1
MPI-ESM1-2-LR_r8i1p1f1_ssp585


This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Co

CPU times: user 42min 7s, sys: 1min 40s, total: 43min 47s
Wall time: 2h 36min 3s




## Run projections

### Functions

In [4]:
def get_training_res(subset_name, obs_name, loss_metric, metrics_include, best_metric):
    """
    Reads the training results
    """
    # Loop through files
    files = glob(f'{project_data_path}/WBM/calibration/{subset_name}/{obs_name}/training_res/*.txt')

    df_out = []
    for file in files:
        # Read
        df = pd.read_csv(file, sep = ' ').dropna(how='any')
        df['epoch'] = df['epoch'].astype(np.float32)

        # Add identifiers
        _, param_id, val_id, _ = file.split('/')[-1].split('_')
        df['param_id'] = param_id
        df['val_id'] = val_id
        
        # Take best
        df_best = df.query('epoch > 10').sort_values(by=loss_metric).iloc[:1]
        if len(df_best) > 0:
            df_out.append(df_best)

    # Join
    df_out = pd.concat(df_out).reset_index().drop(columns='index')

    # Filter best result for each metric
    if best_metric:
        metric_min_inds = df_out.groupby('metric')[loss_metric].idxmin()
        df_out = df_out.loc[metric_min_inds].reset_index().drop(columns='index')

    # Subset metrics 
    if metrics_include != 'all':
        df_out = df_out[df_out['metric'].isin(metrics_include)]

    # Return
    return df_out

In [5]:
def run_projections(subset_name, obs_name, projection_id, loss_metric, loss_metrics_include, best_loss):
    # Get soil parameters
    df_res = get_training_res(subset_name, obs_name, loss_metric, loss_metrics_include, best_loss)

    # Check if all done
    if len(glob(f"{project_data_path}/projections/{subset_name}/out/{projection_id}_{obs_name}_*")) < len(df_res):
        # Check forcing exists
        if os.path.exists(f"{project_data_path}/projections/{subset_name}/forcing/{projection_id}.zarr"):
            # Read all
            x_forcing_nt, x_forcing_nyrs, x_maps, valid_inds = read_projection_inputs(subset_name, obs_name, projection_id, True)

            # Need for out grid
            lats = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lat.npy")
            lons = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lon.npy")

            # Loop through soil parameters
            for iparam in range(len(df_res)):
                # Check if already done
                metric_id = df_res.iloc[iparam]['metric']
                param_id = df_res.iloc[iparam]['param_id']
                val_id = df_res.iloc[iparam]['val_id']
                sim_id = f"{obs_name}_{metric_id}_{param_id}_{val_id}"

                save_name = f"{project_data_path}/projections/{subset_name}/out/{projection_id}_{sim_id}"
            
                if not os.path.exists(f"{save_name}.npz"):
                    # Run it
                    theta = jnp.array([df_res.iloc[iparam][param] for param in param_names])
                    out = make_prediction_vmap(theta, constants, x_forcing_nt, x_forcing_nyrs, x_maps)
                    np.savez(save_name, out=out, valid_inds=valid_inds)

In [6]:
# Calculate soil moisture metrics from daily simulations
def calculate_soilMoist_metrics(subset_name, ensemble_name, file_name, metric, threshold):
    """
    Metric can be [mean, 5dmin, 5dmax] calculated as a rolling 10-year anomaly or as a change
    from the "historical" (i.e. calibration period) simulation. Metric takes from of (e.g.)
    "5dmin-change". 
    """
    # Read simulation
    sim_id = file_name.replace(".npz", "")
    npz = np.load(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/{sim_id}.npz")

    ##### Check if done
    if len(metric.split('-')) > 1:
        metric_to_calc = metric.split('-')[0]
        anom_type = metric.split('-')[1]
    else:
        metric_to_calc = metric
        anom_type = ""

    if metric_to_calc.split('_')[0] == "days":
        metric_name = f"{metric_to_calc}_{str(abs(int(threshold)))}"
    else:
        metric_name = metric_to_calc

    metric_name_final = metric_name + (bool(anom_type) * "-") + anom_type
    if os.path.exists(f"{project_data_path}/projections/{subset_name}/metrics/{metric_name_final}/{sim_id}.nc"):
        return None

    # Construct xarray
    lats = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lat.npy")
    lons = np.load(f"{project_code_path}/code/utils/grids/{subset_name}_lon.npy")
    nt = npz['out'].shape[1]
    
    out_full = np.full((len(npz['valid_inds']), nt), np.nan)
    out_full[npz['valid_inds']] = npz['out']
    
    ds_sim = xr.Dataset(data_vars=dict(soilMoist=(["time", "lat", "lon"], 
                                                  np.transpose(out_full.reshape(len(lons), len(lats), nt), (2,1,0)))),
                        coords=dict(lon=lons, lat=lats,
                                    time=xr.cftime_range(start='2023-01-01', periods=nt, calendar='365_day')))

    #### Calculate metric
    def calculate_metric(ds, metric_to_calc, threshold=None):
        # Mean
        if metric_to_calc == "mean":
            ds = ds.resample(time='1Y').mean()
        # 5-day minima
        elif metric_to_calc == "5dmin":
            ds = ds.resample(time='5D').mean().resample(time="1Y").min()
        # 5-day maxima
        elif metric_to_calc == "5dmax":
            ds = ds.resample(time='5D').mean().resample(time="1Y").max()
        # Days above/below thresh
        elif metric_to_calc.split('_')[0] == "days":
            if metric == "days_above":
                ds = (ds >= threshold).resample(time='1Y').sum()
            elif metric == "days_below":
                ds = (ds <= threshold).resample(time='1Y').sum()
        return ds
            
    # Anomalies or not
    if anom_type == "anom":
        ds_out = ds_sim['soilMoist'] - ds_sim['soilMoist'].rolling(time=(365*10), center=True).mean()
        ds_out = calculate_metric(ds_out, metric_to_calc)
    elif anom_type == "change":
        obs_name = sim_id.split('_')[3]
        soil_id = '_'.join(sim_id.split('_')[4:])
        ds_obs = xr.open_dataset(f"{project_data_path}/WBM/calibration/{subset_name}/{obs_name}/hindcasts/best_only/{soil_id}.nc")
        ds_obs = calculate_metric(ds_obs, metric_to_calc).mean(dim="time")
        ds_sim = calculate_metric(ds_sim, metric_to_calc)
        ds_out = ds_sim['soilMoist'] - ds_obs['soilMoist']
    else:
        ds_out = ds_sim['soilMoist']
        ds_out = calculate_metric(ds_out, metric_to_calc)

    ds_out = xr.Dataset({metric_name_final: ds_out})
        
    # Memory management
    del ds_sim, npz
    
    # Get info
    model, member, ssp, obs_name, loss_metric, param_id, val_id = sim_id.split('_')
    projection_id = f"{ensemble_name}/{model}_{member}_{ssp}"
    soil_id = f"{obs_name}_{loss_metric}_{param_id}_{val_id}"
    
    ds_out = ds_out.assign_coords(model=model)
    ds_out = ds_out.assign_coords(member=member)
    ds_out = ds_out.assign_coords(ssp=ssp)
    ds_out = ds_out.assign_coords(obs_name=obs_name)
    ds_out = ds_out.assign_coords(loss_metric=loss_metric)
    ds_out = ds_out.assign_coords(param_id=param_id)
    ds_out = ds_out.assign_coords(val_id=val_id)

    ds_out = ds_out.assign_coords(projection_id=projection_id)
    ds_out = ds_out.assign_coords(soil_id=soil_id)
    
    # Store
    ds_out.to_netcdf(f"{project_data_path}/projections/{subset_name}/metrics/{metric_name_final}/{sim_id}.nc")

### Simulations

In [10]:
# Settings
subset_name = 'eCONUS'
loss_metric = 'pred_loss'

loss_metrics_include = ['kge', 'nse', 'taylor',
                   'rmse', 'ubrmse',
                   'mae', 'ubmae', 
                   'mse', 'ubmse',
                   'outer20rmse', 'outer20ubrmse',
                   'outer50rmse', 'outer50ubrmse']

best_loss = True

n_member_min = 1
ssps = ['ssp370', 'ssp245']
ensemble_name = "LOCA2"

In [11]:
%%time
# Parallelize with dask delayed
delayed = []

# Loop through obs
for obs_name in ['SMAP', 'VIC', 'NOAH', 'MOSAIC']:
    # Loop through models
    for model in models:
        # Loop through members
        if len(loca_all[model].keys()) >= n_member_min:
            for member in list(loca_all[model].keys())[:1]:
                # Loop through SSPs
                for ssp in loca_all[model][member]:
                    if ssp in ssps:
                        # Run it
                        projection_id = f"LOCA2/{model}_{member}_{ssp}"
                        delayed.append(dask.delayed(run_projections)(subset_name,
                                                                     obs_name,
                                                                     projection_id,
                                                                     loss_metric,
                                                                     loss_metrics_include,
                                                                     best_loss))
    
# Compute
print(len(delayed))
# _ = dask.compute(*delayed)

180
CPU times: user 10.3 ms, sys: 1.6 ms, total: 11.9 ms
Wall time: 13.2 ms


### Calculate metrics

In [7]:
# Settings
subset_name = 'eCONUS'
ensemble_name = "LOCA2"

In [17]:
%%time

soil_metric = "mean-change"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 4.77 s, sys: 146 ms, total: 4.92 s
Wall time: 5.12 s


In [15]:
%%time

soil_metric = "5dmin-change"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 6.29 s, sys: 256 ms, total: 6.55 s
Wall time: 15.5 s


In [16]:
%%time

soil_metric = "5dmax-change"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 4.34 s, sys: 69.2 ms, total: 4.41 s
Wall time: 4.51 s


In [18]:
%%time

soil_metric = "mean-anom"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 4.84 s, sys: 123 ms, total: 4.96 s
Wall time: 5.14 s


In [19]:
%%time

soil_metric = "5dmin-anom"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 5.04 s, sys: 145 ms, total: 5.19 s
Wall time: 5.33 s


In [20]:
%%time

soil_metric = "5dmax-anom"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 4.63 s, sys: 117 ms, total: 4.75 s
Wall time: 4.93 s


In [8]:
%%time

soil_metric = "mean"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 8min 14s, sys: 33.9 s, total: 8min 48s
Wall time: 1h 47s


In [8]:
%%time

soil_metric = "5dmin"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 11min 18s, sys: 45.1 s, total: 12min 3s
Wall time: 1h 30min 37s


In [21]:
%%time

soil_metric = "5dmax"
threshold = None

# Parallelize with dask delayed
delayed = []

# Get simulations
file_names = os.listdir(f"{project_data_path}/projections/{subset_name}/out/{ensemble_name}/")

# Run it
for file_name in file_names:
    # Only take first member
    model, member = file_name.split('_')[:2]
    if member == list(loca_all[model].keys())[0]:
        delayed.append(dask.delayed(calculate_soilMoist_metrics)(subset_name=subset_name,
                                                                 ensemble_name=ensemble_name,
                                                                 file_name=file_name,
                                                                 metric=soil_metric,
                                                                 threshold=threshold))
    
# Compute
print(len(delayed))
_ = dask.compute(*delayed)

2340
CPU times: user 12min 29s, sys: 45.9 s, total: 13min 15s
Wall time: 1h 22min 3s
