The following code processes monthly faf-water SST data, using ocean potential temperature fields (thetao) or sea surface temperature (tos) ESGF and the intake-esm datastore, producing the following files used in the analysis notebooks: SST_response_maps_fafwater.nc, and fafwater_CRFs_unscaled.nc

For the response maps, all data is regridded to a common 1x1 grid. Large memory allocations are required to process the piControl data (100 GB used here). 

Directory paths must be changed to a user's local environment if reproducing 

In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import xesmf as xe
import gsw
from glob import glob
import netCDF4
import gc

from xmip.preprocessing import rename_cmip6
from xmip.preprocessing import broadcast_lonlat
from xmip.preprocessing import promote_empty_dims
from xmip.preprocessing import combined_preprocessing
from xmip.utils import google_cmip_col
from xmip.postprocessing import interpolate_grid_label
from xmip.postprocessing import pick_first_member
from xmip.postprocessing import concat_members

import dask
import dask.array as da
import dask_jobqueue
from dask.distributed import Client, config
from dask.diagnostics import ProgressBar


%matplotlib inline 
import warnings
warnings.filterwarnings("ignore")

In [2]:
########################################################################
# extract contributing model names for a given directory/experiment

def extract_model_names(directory,experiment):
    model_names = []
    file_pattern = os.path.join(directory, '*_*_{}.nc'.format(experiment))

    for file_path in glob(file_pattern):
        filename = os.path.basename(file_path)
        model_name = filename.split('_')[1]  # Extract the model name from the filename
        model_names.append(model_name)
        
    # Remove duplicates by converting the list to a set and then back to a list
    model_names = list(set(model_names))

    return model_names

########################################################################

# get faf-water contributing model names

directory_of_interest = '/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/fafwater'
model_names_fafwater = extract_model_names(directory_of_interest,'faf-water')
print('faf-water contributing models:')
print(model_names_fafwater)


faf-water contributing models:
['ACCESS-CM2', 'HadGEM3-GC31-LL', 'CAS-ESM2-0', 'CanESM5', 'GFDL-ESM2M', 'MIROC6', 'CESM2', 'FGOALS-g3', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'MPI-ESM1-2-HR']


In [3]:
# initialize dask client for preprocessing. 

cluster = dask.distributed.LocalCluster\
(dashboard_address=':9000',n_workers=40,memory_limit='100GB')

client = dask.distributed.Client(cluster)
client


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:9000/status,

0,1
Dashboard: http://127.0.0.1:9000/status,Workers: 40
Total threads: 40,Total memory: 3.64 TiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:36311,Workers: 40
Dashboard: http://127.0.0.1:9000/status,Total threads: 40
Started: Just now,Total memory: 3.64 TiB

0,1
Comm: tcp://127.0.0.1:38796,Total threads: 1
Dashboard: http://127.0.0.1:45479/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:44851,
Local directory: /tmp/dask-worker-space/worker-ost15oon,Local directory: /tmp/dask-worker-space/worker-ost15oon

0,1
Comm: tcp://127.0.0.1:44014,Total threads: 1
Dashboard: http://127.0.0.1:37536/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:33611,
Local directory: /tmp/dask-worker-space/worker-fvxnt2qv,Local directory: /tmp/dask-worker-space/worker-fvxnt2qv

0,1
Comm: tcp://127.0.0.1:39474,Total threads: 1
Dashboard: http://127.0.0.1:36434/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:39747,
Local directory: /tmp/dask-worker-space/worker-tmesg5x9,Local directory: /tmp/dask-worker-space/worker-tmesg5x9

0,1
Comm: tcp://127.0.0.1:39908,Total threads: 1
Dashboard: http://127.0.0.1:39382/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:34307,
Local directory: /tmp/dask-worker-space/worker-vya0lkwa,Local directory: /tmp/dask-worker-space/worker-vya0lkwa

0,1
Comm: tcp://127.0.0.1:43084,Total threads: 1
Dashboard: http://127.0.0.1:40490/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:36998,
Local directory: /tmp/dask-worker-space/worker-x5vuhsew,Local directory: /tmp/dask-worker-space/worker-x5vuhsew

0,1
Comm: tcp://127.0.0.1:35516,Total threads: 1
Dashboard: http://127.0.0.1:32818/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:40314,
Local directory: /tmp/dask-worker-space/worker-sav2cigc,Local directory: /tmp/dask-worker-space/worker-sav2cigc

0,1
Comm: tcp://127.0.0.1:33743,Total threads: 1
Dashboard: http://127.0.0.1:46599/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:43821,
Local directory: /tmp/dask-worker-space/worker-vv4yvvga,Local directory: /tmp/dask-worker-space/worker-vv4yvvga

0,1
Comm: tcp://127.0.0.1:38983,Total threads: 1
Dashboard: http://127.0.0.1:37861/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:46426,
Local directory: /tmp/dask-worker-space/worker-i6r9zzpm,Local directory: /tmp/dask-worker-space/worker-i6r9zzpm

0,1
Comm: tcp://127.0.0.1:37826,Total threads: 1
Dashboard: http://127.0.0.1:43122/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:37785,
Local directory: /tmp/dask-worker-space/worker-n5xz9i90,Local directory: /tmp/dask-worker-space/worker-n5xz9i90

0,1
Comm: tcp://127.0.0.1:40232,Total threads: 1
Dashboard: http://127.0.0.1:35441/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:37534,
Local directory: /tmp/dask-worker-space/worker-2n59rh65,Local directory: /tmp/dask-worker-space/worker-2n59rh65

0,1
Comm: tcp://127.0.0.1:38111,Total threads: 1
Dashboard: http://127.0.0.1:37288/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:36580,
Local directory: /tmp/dask-worker-space/worker-yeddbbih,Local directory: /tmp/dask-worker-space/worker-yeddbbih

0,1
Comm: tcp://127.0.0.1:33741,Total threads: 1
Dashboard: http://127.0.0.1:36837/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:38330,
Local directory: /tmp/dask-worker-space/worker-s_8ikpw8,Local directory: /tmp/dask-worker-space/worker-s_8ikpw8

0,1
Comm: tcp://127.0.0.1:44530,Total threads: 1
Dashboard: http://127.0.0.1:39903/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:43253,
Local directory: /tmp/dask-worker-space/worker-us0u1rlm,Local directory: /tmp/dask-worker-space/worker-us0u1rlm

0,1
Comm: tcp://127.0.0.1:38479,Total threads: 1
Dashboard: http://127.0.0.1:35435/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:43333,
Local directory: /tmp/dask-worker-space/worker-0w83twag,Local directory: /tmp/dask-worker-space/worker-0w83twag

0,1
Comm: tcp://127.0.0.1:45368,Total threads: 1
Dashboard: http://127.0.0.1:41985/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:43471,
Local directory: /tmp/dask-worker-space/worker-01aw8xd9,Local directory: /tmp/dask-worker-space/worker-01aw8xd9

0,1
Comm: tcp://127.0.0.1:33350,Total threads: 1
Dashboard: http://127.0.0.1:42488/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:41357,
Local directory: /tmp/dask-worker-space/worker-0j_hn2pb,Local directory: /tmp/dask-worker-space/worker-0j_hn2pb

0,1
Comm: tcp://127.0.0.1:34014,Total threads: 1
Dashboard: http://127.0.0.1:46722/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:35564,
Local directory: /tmp/dask-worker-space/worker-f1x6qtu2,Local directory: /tmp/dask-worker-space/worker-f1x6qtu2

0,1
Comm: tcp://127.0.0.1:44743,Total threads: 1
Dashboard: http://127.0.0.1:35704/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:34085,
Local directory: /tmp/dask-worker-space/worker-lfu65skk,Local directory: /tmp/dask-worker-space/worker-lfu65skk

0,1
Comm: tcp://127.0.0.1:45939,Total threads: 1
Dashboard: http://127.0.0.1:33779/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:34567,
Local directory: /tmp/dask-worker-space/worker-nme4tead,Local directory: /tmp/dask-worker-space/worker-nme4tead

0,1
Comm: tcp://127.0.0.1:37234,Total threads: 1
Dashboard: http://127.0.0.1:39766/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:33322,
Local directory: /tmp/dask-worker-space/worker-zznqm9xu,Local directory: /tmp/dask-worker-space/worker-zznqm9xu

0,1
Comm: tcp://127.0.0.1:40147,Total threads: 1
Dashboard: http://127.0.0.1:38642/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:43704,
Local directory: /tmp/dask-worker-space/worker-nydo6vf3,Local directory: /tmp/dask-worker-space/worker-nydo6vf3

0,1
Comm: tcp://127.0.0.1:40970,Total threads: 1
Dashboard: http://127.0.0.1:36567/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:40323,
Local directory: /tmp/dask-worker-space/worker-hl0qzdxq,Local directory: /tmp/dask-worker-space/worker-hl0qzdxq

0,1
Comm: tcp://127.0.0.1:45058,Total threads: 1
Dashboard: http://127.0.0.1:41138/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:45872,
Local directory: /tmp/dask-worker-space/worker-6arvbslj,Local directory: /tmp/dask-worker-space/worker-6arvbslj

0,1
Comm: tcp://127.0.0.1:39887,Total threads: 1
Dashboard: http://127.0.0.1:33766/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:46543,
Local directory: /tmp/dask-worker-space/worker-umbui87t,Local directory: /tmp/dask-worker-space/worker-umbui87t

0,1
Comm: tcp://127.0.0.1:42365,Total threads: 1
Dashboard: http://127.0.0.1:45510/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:33866,
Local directory: /tmp/dask-worker-space/worker-kw5ds_fq,Local directory: /tmp/dask-worker-space/worker-kw5ds_fq

0,1
Comm: tcp://127.0.0.1:42653,Total threads: 1
Dashboard: http://127.0.0.1:34947/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:33436,
Local directory: /tmp/dask-worker-space/worker-9qid6aje,Local directory: /tmp/dask-worker-space/worker-9qid6aje

0,1
Comm: tcp://127.0.0.1:33965,Total threads: 1
Dashboard: http://127.0.0.1:41141/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:42361,
Local directory: /tmp/dask-worker-space/worker-uub7ablj,Local directory: /tmp/dask-worker-space/worker-uub7ablj

0,1
Comm: tcp://127.0.0.1:41927,Total threads: 1
Dashboard: http://127.0.0.1:36157/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:36918,
Local directory: /tmp/dask-worker-space/worker-wnamns2v,Local directory: /tmp/dask-worker-space/worker-wnamns2v

0,1
Comm: tcp://127.0.0.1:32990,Total threads: 1
Dashboard: http://127.0.0.1:41167/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:34545,
Local directory: /tmp/dask-worker-space/worker-nkt6l8sn,Local directory: /tmp/dask-worker-space/worker-nkt6l8sn

0,1
Comm: tcp://127.0.0.1:33064,Total threads: 1
Dashboard: http://127.0.0.1:39628/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:39730,
Local directory: /tmp/dask-worker-space/worker-0_kirnka,Local directory: /tmp/dask-worker-space/worker-0_kirnka

0,1
Comm: tcp://127.0.0.1:43679,Total threads: 1
Dashboard: http://127.0.0.1:40762/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:35802,
Local directory: /tmp/dask-worker-space/worker-r7i5l5t8,Local directory: /tmp/dask-worker-space/worker-r7i5l5t8

0,1
Comm: tcp://127.0.0.1:42220,Total threads: 1
Dashboard: http://127.0.0.1:46700/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:41254,
Local directory: /tmp/dask-worker-space/worker-fmv0uvdi,Local directory: /tmp/dask-worker-space/worker-fmv0uvdi

0,1
Comm: tcp://127.0.0.1:37482,Total threads: 1
Dashboard: http://127.0.0.1:37565/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:35875,
Local directory: /tmp/dask-worker-space/worker-xqe2nz36,Local directory: /tmp/dask-worker-space/worker-xqe2nz36

0,1
Comm: tcp://127.0.0.1:35479,Total threads: 1
Dashboard: http://127.0.0.1:36173/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:40248,
Local directory: /tmp/dask-worker-space/worker-sxmz6xsg,Local directory: /tmp/dask-worker-space/worker-sxmz6xsg

0,1
Comm: tcp://127.0.0.1:36282,Total threads: 1
Dashboard: http://127.0.0.1:33795/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:39540,
Local directory: /tmp/dask-worker-space/worker-5n2n24x_,Local directory: /tmp/dask-worker-space/worker-5n2n24x_

0,1
Comm: tcp://127.0.0.1:33940,Total threads: 1
Dashboard: http://127.0.0.1:38858/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:40383,
Local directory: /tmp/dask-worker-space/worker-qxbx9gbx,Local directory: /tmp/dask-worker-space/worker-qxbx9gbx

0,1
Comm: tcp://127.0.0.1:34899,Total threads: 1
Dashboard: http://127.0.0.1:40979/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:44400,
Local directory: /tmp/dask-worker-space/worker-13814a6j,Local directory: /tmp/dask-worker-space/worker-13814a6j

0,1
Comm: tcp://127.0.0.1:43873,Total threads: 1
Dashboard: http://127.0.0.1:33152/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:35685,
Local directory: /tmp/dask-worker-space/worker-8im06cdv,Local directory: /tmp/dask-worker-space/worker-8im06cdv

0,1
Comm: tcp://127.0.0.1:35143,Total threads: 1
Dashboard: http://127.0.0.1:42793/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:45842,
Local directory: /tmp/dask-worker-space/worker-rcea1a_f,Local directory: /tmp/dask-worker-space/worker-rcea1a_f

0,1
Comm: tcp://127.0.0.1:33686,Total threads: 1
Dashboard: http://127.0.0.1:33087/status,Memory: 93.13 GiB
Nanny: tcp://127.0.0.1:33648,
Local directory: /tmp/dask-worker-space/worker-h3dyfx6s,Local directory: /tmp/dask-worker-space/worker-h3dyfx6s


In [4]:
%%time 
%env PYTHONWARNINGS=ignore

# build dictionary for faf-water 

thetao_fafwater = {} 

########################################################################
# Homogenize attributes of the CMIP6 faf-water experiments, select uppermost level (if using thetao)
# take the annual mean. 

def process_dataset(model, experiment):
    os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/fafwater')
    ds = xr.open_dataset(f'tos_{model}_{experiment}.nc', chunks={'time': 100})
    if model=='CAS-ESM2-0' or model=='FGOALS-g3':
        ds = broadcast_lonlat(rename_cmip6(ds)).thetao.sel(lev=0, method='nearest')
    else:
        ds = broadcast_lonlat(rename_cmip6(ds)).tos
    ds['time'] = xr.cftime_range(start='1000', periods=len(ds['time']), freq='ME') 
    ds = ds.resample(time='YE').mean()
    
    return ds
########################################################################

delayed_results = [dask.delayed(process_dataset)(model,'faf-water') for model in model_names_fafwater]
results = dask.compute(delayed_results)

thetao_fafwater = {f'thetao_{model}_faf-water': ds for model, ds in zip(model_names_fafwater, results[0])}

# fix land masking issue with FGOALS model 

thetao_fafwater['thetao_FGOALS-g3_faf-water'] = thetao_fafwater['thetao_FGOALS-g3_faf-water']\
.where(thetao_fafwater['thetao_FGOALS-g3_faf-water'] < 1e10, np.nan)





  var = coder.decode(var, name=name)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(self.get_duck_array(), dtype=dtype)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(self.get_duck_array(), dtype=dtype)


CPU times: user 8.73 s, sys: 1.39 s, total: 10.1 s
Wall time: 51.8 s


In [5]:
%%time

# build second dictionary for regridded faf-water data. 

thetao_fafwater_rg = {}

########################################################################
# regrid to standard 1x1 grid using bilinear interpolation. 
# drop dimensions other than time,lat,lon

def regrid_dataarray(data):
    
    keep_coords = {'time', 'lat', 'lon'}
    drop_coords = set(data.coords) - keep_coords

    ds_out = xe.util.grid_global(1,1)
    ds_in = xr.DataArray.to_dataset(data.drop_vars(drop_coords))
    
    regridder = xe.Regridder(ds_in,ds_out, 'bilinear', periodic=True, ignore_degenerate=True)
    
    return regridder(data.drop_vars(drop_coords))
######################################################################## 

delayed_results = [dask.delayed(regrid_dataarray)(thetao_fafwater[key]) \
                   for key in thetao_fafwater.keys()]

results = dask.compute(delayed_results)

thetao_fafwater_rg = {f'thetao_{model}_faf-water': ds for model, ds in zip(model_names_fafwater, results[0])}



CPU times: user 10.5 s, sys: 1.69 s, total: 12.2 s
Wall time: 46.2 s


In [6]:
%%time 
%env PYTHONWARNINGS=ignore

# Access piControl data for these same models from the intake ESM datastore. 
# CAS-ESM2-0 and GFDL-ESM2M are not accessible here and are merged in later from saved output accessed on ESGF 

col = google_cmip_col()

df_base = col.search(
    activity_id=['CMIP'],
    table_id = ['Omon'],
    variable_id = ['thetao'],
    experiment_id = ['piControl']
)

# use faf-water attributes to find corresponding models and their ensemble members 

df_piControl = df_base.search(
    source_id = model_names_fafwater,
    member_id = ['r1i1p1f1','r1i1p2f1'] 
)

# set kwargs for preprocessing 

kwargs = {
    'zarr_kwargs':{
        'consolidated':True,
    },
    'aggregate':False,
    'preprocess':combined_preprocessing
}

# create xarray dictionary from search query. 

with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    thetao_piControl_fw = df_piControl.to_dataset_dict(**kwargs)
    
    
# check for un-needed ensemble members and grid variants. 

for key in thetao_piControl_fw.keys():
    
    print(thetao_piControl_fw[key].attrs['source_id'], \
          thetao_piControl_fw[key].attrs['grid_label'],thetao_piControl_fw[key].attrs['variant_label'])


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(self.get_duck_array(), dtype=dtype)
  return np.asarray(self.get_duck_array(), dtype=dtype)
  return np.asarray(self.get_duck_array(), dtype=dtype)
    incompatible units for variable 'lev': cannot convert a non-quantity using 'm' as unit
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return np.asarray(self.get_duck_array(), dtype=dtype)
  return np.asarray(self.get_duck_array(), dtype=dtype)


MPI-ESM1-2-HR gn r1i1p1f1
MPI-ESM1-2-LR gn r1i1p1f1
CESM2 gn r1i1p1f1
CanESM5 gn r1i1p2f1
CanESM5 gn r1i1p1f1
MIROC6 gn r1i1p1f1
FGOALS-g3 gn r1i1p1f1
ACCESS-CM2 gn r1i1p1f1
CESM2 gr r1i1p1f1
HadGEM3-GC31-LL gn r1i1p1f1
MRI-ESM2-0 gr r1i1p1f1
CPU times: user 11.5 s, sys: 1.46 s, total: 12.9 s
Wall time: 59.4 s


In [7]:
# remove un-needed grid variants. 

thetao_piControl_fw = interpolate_grid_label(thetao_piControl_fw, target_grid_label='gn')

# find and remove CanESM5 ensemble member r1i1p1f1, since faf-water uses r1i1p1f2
# concat_members is an alternative option, but causes big dask delays. 

key_to_delete = None

for key, dataset in thetao_piControl_fw.items():
    if (dataset.attrs.get('source_id') == 'CanESM5' and
        dataset.attrs.get('variant_label') == 'r1i1p1f1'):
        
        key_to_delete = key
        break

if key_to_delete:
    
    del thetao_piControl_fw[key_to_delete]


# retitle intake_esm dictionary to be consistent with faf-water experiment dictionaries. 

keys_to_replace  = list(thetao_piControl_fw.keys())

for key in keys_to_replace:
    
    model = thetao_piControl_fw[key].attrs['intake_esm_attrs:source_id']

    thetao_piControl_fw[f'thetao_{model}_piControl'] = thetao_piControl_fw[key]
    
    del thetao_piControl_fw[key]

In [8]:
%%time

# pre-process and regrid piControl data as in faf-water, but with syntax optimized for intake_ESM data. 
# limit to first 100 years of piControl 

keys_intakesm = list(thetao_piControl_fw.keys())

# overwrite thetao_piControl_fw dictionary contents with annual mean of first 100 year for top vertical level 
# standardize time indexing 

for key in keys_intakesm:

    thetao_piControl_fw[key] = thetao_piControl_fw[key].thetao.squeeze()\
    .sel(lev=0,method='nearest').isel(time=slice(0,1200)).compute()
    
    thetao_piControl_fw[key]['time'] = \
    xr.cftime_range(start='1000', periods=len(thetao_piControl_fw[key]['time']), freq='M')    

# fix land masking issue with FGOALS model 

thetao_piControl_fw['thetao_FGOALS-g3_piControl'] = thetao_piControl_fw['thetao_FGOALS-g3_piControl']\
.where(thetao_piControl_fw['thetao_FGOALS-g3_piControl'] < 1e10, np.nan)


# then, put regridded data in additional dictionary

thetao_piControl_fw_rg = {}

for key in keys_intakesm:

    thetao_piControl_fw_rg[key] = regrid_dataarray(thetao_piControl_fw[key])

CPU times: user 4min 53s, sys: 44.9 s, total: 5min 38s
Wall time: 17min 37s


In [9]:
%%time 
%env PYTHONWARNINGS=ignore

# redefine process_dataset function for piControl data taken from ESGF 

########################################################################

def process_dataset(variable, model, experiment):
    os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/fafwater/piControl_missingfromcloud')
    ds = xr.open_dataset(f'{variable}_{model}_{experiment}.nc', chunks={'time': 100})
    ds = broadcast_lonlat(rename_cmip6(ds)).tos
    ds['time'] = xr.cftime_range(start='1000', periods=len(ds['time']), freq='M') 
    ds = ds.resample(time='YE').mean()
    
    return ds

########################################################################

# retrieve two missing piControl models from ESGF 

thetao_piControl_fw_additions = {} 

delayed_results = [dask.delayed(process_dataset)('tos',model,'piControl') \
                   for model in ['CAS-ESM2-0','GFDL-ESM2M']]
results = dask.compute(delayed_results)

thetao_piControl_fw_additions = {f'thetao_{model}_piControl': ds \
                                 for model, ds in zip(['CAS-ESM2-0','GFDL-ESM2M'], results[0])}


# regrid two missing models from ESGF

thetao_piControl_fw_additions_rg = {} 

delayed_results = [dask.delayed(regrid_dataarray)(thetao_piControl_fw_additions[key]) \
                   for key in thetao_piControl_fw_additions.keys()]

results = dask.compute(delayed_results)

thetao_piControl_fw_additions_rg = {f'thetao_{model}_piControl': ds \
                                    for model, ds in zip(['CAS-ESM2-0','GFDL-ESM2M'], results[0])}







CPU times: user 12 s, sys: 2.23 s, total: 14.2 s
Wall time: 1min 3s


In [10]:
# merge ESGF and intake_esm sources

thetao_piControl_fw = {**thetao_piControl_fw,**thetao_piControl_fw_additions}
thetao_piControl_fw_rg = {**thetao_piControl_fw_rg,**thetao_piControl_fw_additions_rg}


In [11]:
# correct units for GFDL-ESM2M piControl  (deg. C to K)

thetao_piControl_fw_rg[f'thetao_GFDL-ESM2M_piControl'] = thetao_piControl_fw_rg[f'thetao_GFDL-ESM2M_piControl']-273
thetao_piControl_fw[f'thetao_GFDL-ESM2M_piControl'] = thetao_piControl_fw[f'thetao_GFDL-ESM2M_piControl']-273

In [13]:
%%time

# Initialize lists for SST anomalies

SST_anoms_fafwater = []

# Define a function to calculate anomalies for a single model 

def calculate_anomaly_fafwater(model):
    return (thetao_fafwater_rg[f'thetao_{model}_faf-water'][60:70,:,:].mean(dim='time')
            - thetao_piControl_fw_rg[f'thetao_{model}_piControl'].mean(dim='time'))

# Calculate anomalies for each experiment and append to the lists

SST_anoms_fafwater = [calculate_anomaly_fafwater(model) for model in model_names_fafwater]

# Convert lists to xarray DataArrays

SST_anoms_fafwater = \
xr.concat(SST_anoms_fafwater, dim='model', coords='minimal', compat='override')

SST_anoms_fafwater = SST_anoms_fafwater.assign_coords(model=model_names_fafwater)

CPU times: user 1.65 s, sys: 1.46 s, total: 3.11 s
Wall time: 2.64 s


In [14]:
# save postprocessed file for anomaly maps of each model. 

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/postprocessed_analysis_notebooks')

output_filename = 'SST_response_maps_fafwater.nc'
SST_anoms_fafwater.to_netcdf(output_filename)

In [16]:
# calculate spatially averaged SST response (unscaled, for climate response functions )

def subset_bylatitude(data, south_bound, north_bound):
    lat_mask = (data.lat >= south_bound) & (data.lat <= north_bound)
    data_SO = data.where(lat_mask, drop=True)
    return data_SO


# spatially average CMIP6 Omon field over specified latitudes
# assumes spatial average is conducted over all longitudes
# Assumes regrid_dataarray function is already applied
def spatial_average(inputdata,southlat,northlat):
    data_subset = subset_bylatitude\
    (inputdata,south_bound=southlat,north_bound=northlat)
    lat_subset = data_subset.lat
    coslat = np.cos(np.deg2rad(lat_subset))
    weight_factor = coslat / coslat.mean(dim='y')
    data_average = \
    (data_subset * weight_factor).mean(dim=('x', 'y'),skipna=True)
    return data_average

# Rename dimensions
thetao_piControl_fw['thetao_GFDL-ESM2M_piControl']\
= thetao_piControl_fw['thetao_GFDL-ESM2M_piControl'].rename({'rlon': 'x', 'rlat': 'y'})

# Initialize dictionary

fafwater_CRF_dict = {}

for model in model_names_fafwater:
    
    fafwater_CRF_dict[f'CRF_{model}_fafwater'] = \
    (spatial_average(thetao_fafwater[f'thetao_{model}_faf-water'].compute(),-65,-50) - \
    spatial_average(thetao_piControl_fw[f'thetao_{model}_piControl'].mean(dim='time').compute(),-65,-50)) 

In [17]:
# save postprocessed file for response functions. 

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/postprocessed_analysis_notebooks')

def save_data_arrays_to_netcdf(data_arrays, filename):
    first_data_array_saved = False

    for var_name, data_array in data_arrays.items():
        mode = 'w' if not first_data_array_saved else 'a'
        data_array.to_netcdf(filename, mode=mode, group=var_name)
        first_data_array_saved = True
        
        
save_data_arrays_to_netcdf(fafwater_CRF_dict, 'fafwater_CRFs_unscaled.nc')