Notebook produces mean-states for ARISE and its reference scenario within defined time-windows and outputs these spatial fields to .nc files in /Output_data

Made v0 on Wednesday 10th January 2024

Alistair Duffey



In [1]:
import os
import glob
import pandas as pd
import numpy as np
import xarray as xr
from xmip.preprocessing import rename_cmip6
import matplotlib
import matplotlib.pyplot as plt
from nc_processing import calc_spatial_mean
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [2]:
### options

# Model
model = 'UKESM1-0-LL' 

# SAI reference baseline (to be taken from joined historical-into-ssp245 runs)
baseline_start, baseline_end = '2013','2032' # years are INCLUSIVE. gives warming of 1.49 in UKESM1, closest 20-year period to 1.5

# SAI assessment period
SAI_assessment_period_start, SAI_assessment_period_end = '2050', '2069' # years are INCLUSIVE

# ARISE ensemble_members. We also only use these same members for the baseline
ens_mems = ['r1i1p1f2', 'r2i1p1f2', 'r3i1p1f2', 'r4i1p1f2', 'r8i1p1f2']

# variables to produce outputs for. Must be 2d (i.e. not on levels). 
vars = {'tas':'Amon',
        'ts':'Amon', # surface temperature for SST analysis, use atmopsheric variable to avoid ocean grid pain
        'pr':'Amon',
        'prmax':'Amon',
        'tasmax':'Amon',
        'tasmin':'Amon',
        'psl':'Amon',
        'clwvi':'Amon',
        'sfcWind':'Amon',
        'sfcWindmax':'Amon',
        'rsds':'Amon',
        'rsdscs':'Amon',
        'prsn':'Amon',
        'snc':'LImon', # Snow Area Percentage [%]
        'snw':'LImon', # Surface Snow Amount [kg m-2]
        'evspsbl':'Amon',
        'siconca':'SImon',
        'npp':'Lmon',
        'gpp':'Lmon'
       }

maxvars = ['tasmax', 'prmax', 'sfcWindmax'] # variables in this list will group by seasonal/annual max rather than mean
minvars = ['tasmin'] # variables in this list will group by seasonal/annual min rather than mean

# seasons
seasons = ['DJF', 'MAM', 'JJA', 'SON']

In [3]:
# make SSP245 ensemble mean pr ds
def get_ssp245_ds(variable, table='Amon'):
    ds_list = []
    for es in ens_mems:
        path = '/badc/cmip6/data/CMIP6/ScenarioMIP/MOHC/UKESM1-0-LL/ssp245/{e}/{t}/{v}/*/latest/'.format(e=es, t=table,v=variable)
        ds = rename_cmip6(xr.open_mfdataset(path+'*.nc'))
        
        path_hist = glob.glob('/badc/cmip6/data/CMIP6/*/*/UKESM1-0-LL/historical/{e}/{t}/{v}/*/latest/'.format(
        t=table, v=variable, e=es))[0]
        ds_hist = rename_cmip6(xr.open_mfdataset(path_hist+'*.nc'))    
        ds = xr.concat([ds_hist, ds], dim='time')
        ds = ds.sel(time=slice('1990', '2150'))
        if 'height' in ds.variables:
            ds = ds.drop('height')
        if 'type' in ds.variables:
            ds = ds.drop('type')
        ds_list.append(ds)
    
    DS = xr.concat(ds_list, dim='Ensemble_member')
    return DS

## similar, for PiControl
def get_pi(model, variable='tas', table='Amon'):
    dir_pi = glob.glob('/badc/cmip6/data/CMIP6/*/*/{m}/piControl/r1i*/{t}/{v}/*/latest/'.format(m=model, t=table, v=variable))
    files_pi = os.listdir(dir_pi[0])[0:3] # don't need the full length run
    paths_pi = []
    for x in files_pi:
        paths_pi.append(dir_pi[0]+x)
    ds = rename_cmip6(xr.open_mfdataset(paths_pi))
    if 'height' in ds.variables:
        ds = ds.drop('height')
    if 'type' in ds.variables:
        ds = ds.drop('type')
    return ds

## for ARISE
def get_ARISE_UKESM(variable='tas', table='Amon'):
    ds_list = []
    paths = glob.glob('/badc/deposited2022/arise/data/ARISE/MOHC/UKESM1-0-LL/arise-sai-1p5/*/{t}/{v}/*/*/'.format(
    t=table, v=variable))
    for path in paths:
        ds = rename_cmip6(xr.open_mfdataset(path+'*.nc'))
        if 'height' in ds.variables:
            ds = ds.drop('height')
        if 'type' in ds.variables:
            ds = ds.drop('type')
        ds_list.append(ds)
    DS = xr.concat(ds_list, dim='Ensemble_member')
    return DS

def get_seasonal(ds, var):
    if var in maxvars:
        ds_seasonal = ds.resample(time="QS-DEC").max()
    elif var in minvars:
        ds_seasonal = ds.resample(time="QS-DEC").min()
    else:
        ds_seasonal = ds.resample(time="QS-DEC").mean()
    return ds_seasonal

In [4]:
print('Getting baseline')
ds_list, ds_list_seasonal = [], []

for var in tqdm(vars.keys()):
    ds = get_ssp245_ds(variable=var, table=vars[var])
    ds_seasonal = get_seasonal(ds, var)
    ds, ds_seasonal = ds.sel(time=slice(baseline_start, baseline_end)), ds_seasonal.sel(time=slice(baseline_start, baseline_end))
    ds.attrs['t_bnds'] = [baseline_start, baseline_end]
    ds_seasonal.attrs['t_bnds'] = [baseline_start, baseline_end]
    ds_list.append(ds)
    ds_list_seasonal.append(ds_seasonal)

out_baseline, out_baseline_seasonal = xr.merge(ds_list), xr.merge(ds_list_seasonal)

Getting baseline


100%|██████████| 14/14 [27:24<00:00, 117.47s/it] 


In [5]:
print('Getting ssp245')
ds_list, ds_list_seasonal = [], []

for var in tqdm(vars.keys()):
    ds = get_ssp245_ds(variable=var, table=vars[var])
    #ds = ds.sel(time=slice(baseline_start, baseline_end)).groupby('time.season').mean(dim='time')
    ds_seasonal = get_seasonal(ds, var)
    ds, ds_seasonal = ds.sel(time=slice(SAI_assessment_period_start, SAI_assessment_period_end)), ds_seasonal.sel(time=slice(SAI_assessment_period_start, SAI_assessment_period_end))
    ds.attrs['t_bnds'] = [SAI_assessment_period_start, SAI_assessment_period_end]
    ds_seasonal.attrs['t_bnds'] = [SAI_assessment_period_start, SAI_assessment_period_end]
    ds_list.append(ds)
    ds_list_seasonal.append(ds_seasonal)

out_ssp245, out_ssp245_seasonal = xr.merge(ds_list), xr.merge(ds_list_seasonal)

Getting ssp245


100%|██████████| 14/14 [01:04<00:00,  4.59s/it]


In [6]:
print('Getting PI')
ds_list, ds_list_seasonal = [], []
for var in tqdm(vars.keys()):
    #print(var)
    ds = get_pi(model, variable=var, table=vars[var])
    ds_seasonal = get_seasonal(ds, var)
    ds, ds_seasonal = ds.isel(time=slice(0, 100*12)), ds_seasonal.isel(time=slice(1, (100*4)+1)) #keep only 100 years
    ds_list.append(ds)
    ds_list_seasonal.append(ds_seasonal)
    
out_PI, out_PI_seasonal = xr.merge(ds_list), xr.merge(ds_list_seasonal)

Getting PI


100%|██████████| 14/14 [02:06<00:00,  9.04s/it]


In [7]:
print('Getting ARISE')
ds_list, ds_list_seasonal = [], []
for var in tqdm(vars.keys()):
    ds = get_ARISE_UKESM(variable=var, table=vars[var])
    ds_seasonal = get_seasonal(ds, var)
    ds, ds_seasonal = ds.sel(time=slice(SAI_assessment_period_start, SAI_assessment_period_end)), ds_seasonal.sel(time=slice(SAI_assessment_period_start, SAI_assessment_period_end))
    ds.attrs['t_bnds'] = [SAI_assessment_period_start, SAI_assessment_period_end]
    ds_seasonal.attrs['t_bnds'] = [SAI_assessment_period_start, SAI_assessment_period_end]
    ds_list.append(ds)
    ds_list_seasonal.append(ds_seasonal)
out_ARISE, out_ARISE_seasonal = xr.merge(ds_list), xr.merge(ds_list_seasonal)

Getting ARISE


100%|██████████| 14/14 [01:52<00:00,  8.01s/it]


In [8]:
def process_and_save(ds, ds_seasonal, label, seasons=seasons):
    """ 
    Inputs
    ds: a time resolved, quarterly resampled, spatial dataset, with an ensemble_member dimension
    label: 'baseline', 'sai', or 'preindustrial'. Defines naming of outputs. 
    
    Function saves the mean and standard deviation across the whole time+ens_mems combined dimension
    """
    path = 'Output_data/{l}/'.format(l=label)
    if not os.path.isdir(path):
        os.mkdir(path)
    
    for season in seasons:
        ds_season = ds_seasonal.where(ds_seasonal.time.dt.season == season, drop=True)
        try:
            std = ds_season.std(dim=['time', 'Ensemble_member'])
            mean = ds_season.mean(dim=['time', 'Ensemble_member'])
        except:
            std = ds_season.std(dim=['time'])
            mean = ds_season.mean(dim=['time'])

        std.to_netcdf('Output_data/{l}/{l}_{s}_std.nc'.format(l=label, s=season))
        mean.to_netcdf('Output_data/{l}/{l}_{s}_mean.nc'.format(l=label, s=season))
        
    # repeat for the annual mean:
    try:
        ds.std(dim=['time', 'Ensemble_member']).to_netcdf('Output_data/{l}/{l}_all_std.nc'.format(l=label))
        ds.mean(dim=['time', 'Ensemble_member']).to_netcdf('Output_data/{l}/{l}_all_mean.nc'.format(l=label))
    except:
        ds.std(dim=['time']).to_netcdf('Output_data/{l}/{l}_all_std.nc'.format(l=label))
        ds.mean(dim=['time']).to_netcdf('Output_data/{l}/{l}_all_mean.nc'.format(l=label))

In [9]:
dict_to_run = {"SSP245_baseline" : [out_baseline, out_baseline_seasonal],
               "SSP245" : [out_ssp245, out_ssp245_seasonal],
               "preindustrial": [out_PI, out_PI_seasonal],
               "ARISE": [out_ARISE, out_ARISE_seasonal]}

In [10]:
for key in dict_to_run.keys(): #takes maybe 20-30 mins with 14 variables
    print(key)
    process_and_save(ds = dict_to_run[key][0],
                     ds_seasonal = dict_to_run[key][1],
                     label = key,
                     seasons=seasons)

SSP245_baseline
SSP245
preindustrial
ARISE
