## Get Max Variables
This notebook takes raw daily data and output variables conditioned on maximum t2m across each year.

Have option to condition on daily mean or daily maximum `t2m` through the `t2m_av_use` variable. Zhang 2023 used daily maximum

In [1]:
import os
import xarray as xr
import numpy as np
from numba import jit
from tqdm.notebook import tqdm
print(os.getcwd())

/home/users/jamd1/Isca


In [2]:
dir_base = '/gws/nopw/j04/global_ex/jamd1/era5/'
dir_t500 = os.path.join(dir_base, 'daily_mean', 't500')
dir_z500 = os.path.join(dir_base, 'daily_mean', 'z500')
dir_sp = os.path.join(dir_base, 'daily_mean', 'sp')
dir_t2m = {key: os.path.join(dir_base, f'daily_{key}', 't2m') for key in ['mean', 'max']}

In [3]:
t2m_av_use = 'mean'     # get variables conditioned on annual maximum of daily max or mean temperature

In [4]:
@jit
def _at_t2m_max_ufunc(t2m,y):
    return y[np.argmax(t2m)]

def xr_at_t2m_max(t2m,y): # Find the level of a variable on the annual hottest day for each location
    return xr.apply_ufunc(_at_t2m_max_ufunc, t2m, y,
                          input_core_dims=[['time'],['time']], dask ='parallelized', vectorize=True, output_dtypes=[y.dtype])

In [5]:
def load_ds(path, rename_valid_time=True):
    # The t500 dataset has time as `valid_time` rather than `time`. Deal with this
    ds = xr.open_mfdataset(path)
    if rename_valid_time and 'valid_time' in ds.dims:
        ds = ds.rename({'valid_time': 'time'})
    return ds

In [7]:
# For each year, load in data, compute day with max t2m, and output t500 on this day as well as the index of this day
var_input_dir = {'t2m': dir_t2m[t2m_av_use], 't500': dir_t500}
var_id = {'t2m': 't2m', 't500': 't'}    # name of var within input directory

var_out = {key: [] for key in ['t2m', 't500', 'day_ind']}
for year in tqdm(range(1979, 2022)):
    var_in_use = {key: load_ds(f"{var_input_dir[key]}/{year}.nc").chunk(dict(time=-1))[var_id[key]] for key in var_input_dir}
    var_in_use['day_ind'] = np.arange(var_in_use['t2m'].time.size)
    for key in var_out:
        var_out_use = xr_at_t2m_max(var_in_use['t2m'], var_in_use[key]).compute()
        var_out_use = var_out_use.expand_dims(year=[year])
        var_out[key].append(var_out_use)

  0%|          | 0/43 [00:00<?, ?it/s]

In [8]:
def set_attrs(var, long_name, units, description):
    # Function to set main attributes of given variable
    var.attrs['long_name'] = long_name
    var.attrs['units'] = units
    var.attrs['description'] = description
    return var

In [9]:
# Save data for all years in a single file for each variable
complevel = 4           # how much to compress by
var_out_file_name = {'t2m': 't2m_max', 't500': 't500_of_t2m_max', 'day_ind': 'day_ind_of_t2m_max'}
var_out_path = {key: f'/home/users/jamd1/Isca/jobs/era5/zhang_2023/processed/daily_{t2m_av_use}/{var_out_file_name[key]}.nc' for key in var_in_use}
var_out_attrs = {'t2m': {'long_name': var_in_use['t2m'].long_name, 'units': var_in_use['t2m'].units,
                         'description': f'Max of daily {t2m_av_use} 2m temperature in a given year.'},
                 't500': {'long_name': '500hPa temperature', 'units': 'K',
                          'description': f'500 hPa temperature on day with maximum daily {t2m_av_use} 2m temperature in a given year.'},
                 'day_ind': {'long_name': 'Day index', 'units': 'Day',
                              'description': f'Day (0 is Jan 1st) with maximum daily {t2m_av_use} 2m temperature in a given year.'}
                 }
for key in var_out_path:
    var_use = xr.concat(var_out[key], dim='year')
    if key == 'day_ind':
        # time ind has max value of 365 so can use int16
        var_use = var_use.astype('int16')
    var_use = set_attrs(var_use, var_out_attrs[key]['long_name'], var_out_attrs[key]['units'], var_out_attrs[key]['description'])
    var_use = xr.Dataset({key: var_use})    # make sure save as dataset rather than data array
    encoding = {var: {'zlib': True, 'complevel': complevel} for var in var_use.data_vars}
    var_use.to_netcdf(var_out_path[key], encoding=encoding)