In [1]:
import os

os.chdir(os.path.dirname(os.getcwd()))

In [2]:
from dask.diagnostics import ProgressBar
from pathlib import Path
import libs.utils
import libs.vars
import numpy as np
import xarray

xarray.set_options(keep_attrs=True);

In [3]:
# ----- SPECIFIC SETTINGS -----
component = 'Amon'

files = [
    f'{{variable_id}}_{component}_{{source_id}}_historical_{{variant_label}}_{{grid_label}}_198001-201412_processed.nc',
    f'{{variable_id}}_{component}_{{source_id}}_ssp585_{{variant_label}}_{{grid_label}}_201501-210012_processed.nc'
]

In [4]:
def create_prra(ensemble, files):
    for i, item in enumerate(ensemble):
        source_id = item['source_id']
        variant_label = item['variant_label']
        grid_label = 'gn'
        if source_id == 'EC-Earth3':
            grid_label = 'gr'
        
        format_vars = {
            'source_id': source_id,
            'variant_label': variant_label,
            'grid_label': grid_label
        }
        
        for filename in files:
            base_path = f'_data/cmip6/{source_id}/{{variable_id}}/'
            filepath_pr = (base_path + filename).format(variable_id='pr', **format_vars)
            filepath_prsn = (base_path + filename).format(variable_id='prsn', **format_vars)
            filepath_prra = (base_path + filename).format(variable_id='prra', **format_vars)
            
            print(filepath_pr)
            print(filepath_prsn)

            data_pr = xarray.open_mfdataset(paths=filepath_pr, combine='by_coords', use_cftime=True)
            data_prsn = xarray.open_mfdataset(paths=filepath_prsn, combine='by_coords', use_cftime=True)
            
            # Calculate rainfall (pr - prsn)
            data_pr['pr'] -= data_prsn['prsn']
            data_prra = data_pr.rename({ 'pr': 'prra' })
            
            Path(filepath_prra).parent.mkdir(parents=True, exist_ok=True)
            print(filepath_prra)
            
            if Path(filepath_prra).exists():
                print('   -> Exists. skipping')
                continue

            write = data_prra.to_netcdf(
                filepath_prra,
                compute=False,
                engine='netcdf4',
                unlimited_dims=['time']
            )
            with ProgressBar():
                write.compute()

            data_pr.close()
            data_prsn.close()
            print('   -> Saved to disk')

            # Finally, compress as to_netcdf() seems to produce large file sizes
            libs.utils.compress_nc_file(filepath_prra, filepath_prra)
            print('   -> Compressed')

In [5]:
ensemble = libs.vars.ensemble()
create_prra(ensemble, files)

_data/cmip6/UKESM1-0-LL/pr/pr_Amon_UKESM1-0-LL_historical_r2i1p1f2_gn_198001-201412_processed.nc
_data/cmip6/UKESM1-0-LL/prsn/prsn_Amon_UKESM1-0-LL_historical_r2i1p1f2_gn_198001-201412_processed.nc
_data/cmip6/UKESM1-0-LL/prra/prra_Amon_UKESM1-0-LL_historical_r2i1p1f2_gn_198001-201412_processed.nc
   -> Exists. skipping
_data/cmip6/UKESM1-0-LL/pr/pr_Amon_UKESM1-0-LL_ssp585_r2i1p1f2_gn_201501-210012_processed.nc
_data/cmip6/UKESM1-0-LL/prsn/prsn_Amon_UKESM1-0-LL_ssp585_r2i1p1f2_gn_201501-210012_processed.nc
_data/cmip6/UKESM1-0-LL/prra/prra_Amon_UKESM1-0-LL_ssp585_r2i1p1f2_gn_201501-210012_processed.nc
   -> Exists. skipping
_data/cmip6/NorESM2-LM/pr/pr_Amon_NorESM2-LM_historical_r1i1p1f1_gn_198001-201412_processed.nc
_data/cmip6/NorESM2-LM/prsn/prsn_Amon_NorESM2-LM_historical_r1i1p1f1_gn_198001-201412_processed.nc
_data/cmip6/NorESM2-LM/prra/prra_Amon_NorESM2-LM_historical_r1i1p1f1_gn_198001-201412_processed.nc
   -> Exists. skipping
_data/cmip6/NorESM2-LM/pr/pr_Amon_NorESM2-LM_ssp585_