The following code processes monthly antwater SST data, using ocean potential temperature fields (thetao) downloaded from https://crd-data-donnees-rdc.ec.gc.ca/CCCMA/SOFIA/, producing the following files used in the analysis notebooks: SST_response_maps_antwater.nc, and antwater_CRFs_unscaled.nc


Most model output from SOFIA has already been pre-processed by Neil Swart as described below, and this convention will be used as a reference grid for all other models. 
- data has been remapped to a standard 1x1 degree horizontal grid, and interpolated to standard 33 WOA vertical levels (for 3D data).
- variable names have been set to the CMIP6 variable names, and units changed to CMIP6 units

Directory paths must be changed to a user's local environment if reproducing 

In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import xesmf as xe
import gsw
from glob import glob
import netCDF4
import gc
from scipy.optimize import curve_fit
from scipy.stats import pearsonr
from scipy.stats import linregress

from xmip.preprocessing import rename_cmip6
from xmip.preprocessing import broadcast_lonlat

%matplotlib inline 
import warnings
warnings.filterwarnings("ignore")

In [8]:
########################################################################
# extract contributing model names for a given directory/experiment

def extract_model_names(directory,experiment):
    model_names = []
    file_pattern = os.path.join(directory, '*_*_{}.nc'.format(experiment))

    for file_path in glob(file_pattern):
        filename = os.path.basename(file_path)
        model_name = filename.split('_')[1]  # Extract the model name from the filename
        model_names.append(model_name)
        
    # Remove duplicates by converting the list to a set and then back to a list
    model_names = list(set(model_names))

    return model_names

########################################################################

directory_of_interest = '/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/antwater/processed'
model_names_antwater_processed = extract_model_names(directory_of_interest,'antwater')
print('pre-processed antwater contributing models:')
print(model_names_antwater_processed)

########################################################################

directory_of_interest = '/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/antwater/unprocessed'
model_names_antwater_unprocessed = extract_model_names(directory_of_interest,'antwater')
print('unprocessed antwater contributing models:')
print(model_names_antwater_unprocessed)

########################################################################

# combined list of all antwater models for future reference 
model_names_antwater_unprocessed= model_names_antwater_unprocessed
model_names_antwater = model_names_antwater_processed + model_names_antwater_unprocessed


pre-processed antwater contributing models:
['hadgem3-gc31-ll', 'cesm2', 'gfdl-esm4', 'giss-e2-1-g', 'access-esm1-5', 'gfdl-cm4', 'canesm5']
unprocessed antwater contributing models:
['AWI-ESM-1-REcoM']


In [4]:
# create dictionaries for processed data (antwater and picontrol)

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/antwater/processed')

thetao_antwater = {} 
thetao_piControl_SOFIA = {}

for model in model_names_antwater_processed:
    
    thetao_antwater\
    ['thetao_'+ model +'_antwater'] = xr.open_dataset('thetao_{}_antwater.nc'.format(model))
    
    thetao_piControl_SOFIA\
    ['thetao_'+ model +'_piControl'] = xr.open_dataset('thetao_{}_piControl.nc'.format(model))

########################################################################

# create dictionaries for unprocessed data (antwater and picontrol)

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/antwater/unprocessed')

thetao_antwater_unprocessed = {} 
thetao_piControl_SOFIA_unprocessed = {}

for model in model_names_antwater_unprocessed:
    
    thetao_antwater_unprocessed\
    ['thetao_'+ model +'_antwater'] = xr.open_dataset('thetao_{}_antwater.nc'.format(model))
    
    thetao_piControl_SOFIA_unprocessed\
    ['thetao_'+ model +'_piControl'] = xr.open_dataset('thetao_{}_piControl.nc'.format(model))


In [5]:
# For processed models, extract uppermost level of thetao variable and convert to 2d lat/lon.

for key in thetao_antwater.keys():
    thetao_antwater[key] = broadcast_lonlat(rename_cmip6(thetao_antwater[key])).thetao.sel(lev= 0)
    
for key in thetao_piControl_SOFIA.keys():
    thetao_piControl_SOFIA[key] = broadcast_lonlat(rename_cmip6(thetao_piControl_SOFIA[key])).thetao.sel(lev=0)

########################################################################

# Then,take the annual mean of the monthly data, along with creating uniform datetime index across models. 

for key in thetao_antwater.keys():

        thetao_antwater[key]['time'] = xr.cftime_range\
        (start='1000', periods=len(thetao_antwater[key]['time']), freq='M')

        thetao_antwater[key] = thetao_antwater[key].resample(time='A').mean()           
            
for key in thetao_piControl_SOFIA.keys():
    
        thetao_piControl_SOFIA[key]['time'] = xr.cftime_range\
        (start='1000', periods=len(thetao_piControl_SOFIA[key]['time']), freq='M')

        thetao_piControl_SOFIA[key] = thetao_piControl_SOFIA[key].resample(time='A').mean()   
            

In [11]:
# check the state of each unprocessed SOFIA model to see what needs to be addressed in addition to above. 
    
for model in model_names_antwater_unprocessed:
    print('coordinate features of unprocessed SOFIA models')
    print('----------')    
    print(model)
    print('----------')
    print(thetao_antwater_unprocessed\
    ['thetao_{}_antwater'.format(model)].coords)
    print(thetao_piControl_SOFIA_unprocessed\
    ['thetao_{}_piControl'.format(model)].coords)


coordinate features of unprocessed SOFIA models
----------
AWI-ESM-1-REcoM
----------
Coordinates:
  * time     (time) datetime64[ns] 10kB 1870-01-16 ... 1970-12-16
  * lon      (lon) float64 3kB -179.5 -178.5 -177.5 -176.5 ... 177.5 178.5 179.5
  * lat      (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5
  * depth    (depth) float64 368B 0.0 10.0 20.0 ... 5.4e+03 5.65e+03 5.9e+03
Coordinates:
  * time     (time) datetime64[ns] 10kB 1870-01-16 ... 1970-12-16
  * lon      (lon) float64 3kB -179.5 -178.5 -177.5 -176.5 ... 177.5 178.5 179.5
  * lat      (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5
  * depth    (depth) float64 368B 0.0 10.0 20.0 ... 5.4e+03 5.65e+03 5.9e+03


In [12]:
# Given above, we only need to account for the different name of the depth dimension. 
# Depth intervals here do not follow the WOA standard, but we can ignore this when only using uppermost level 
# Accordingly, the unprocessed model can be treated the same way. 

for key in thetao_antwater_unprocessed.keys():
    thetao_antwater_unprocessed[key] = broadcast_lonlat\
    (rename_cmip6(thetao_antwater_unprocessed[key])).thetao.sel(lev= 0)
    
for key in thetao_piControl_SOFIA_unprocessed.keys():
    thetao_piControl_SOFIA_unprocessed[key] = broadcast_lonlat\
    (rename_cmip6(thetao_piControl_SOFIA_unprocessed[key])).thetao.sel(lev=0)

 ########################################################################

for key in thetao_antwater_unprocessed.keys():

        thetao_antwater_unprocessed[key]['time'] = xr.cftime_range\
        (start='1000', periods=len(thetao_antwater_unprocessed[key]['time']), freq='M')

        thetao_antwater_unprocessed[key] = thetao_antwater_unprocessed[key].resample(time='A').mean()           

for key in thetao_piControl_SOFIA_unprocessed.keys():
    
        thetao_piControl_SOFIA_unprocessed[key]['time'] = xr.cftime_range\
        (start='1000', periods=len(thetao_piControl_SOFIA_unprocessed[key]['time']), freq='M')

        thetao_piControl_SOFIA_unprocessed[key] = \
        thetao_piControl_SOFIA_unprocessed[key].resample(time='A').mean()   


In [13]:
# Finally, merge processed and unprocessed models together 

thetao_antwater = {**thetao_antwater,**thetao_antwater_unprocessed}
thetao_piControl_SOFIA = {**thetao_piControl_SOFIA,**thetao_piControl_SOFIA_unprocessed}


In [15]:
%%time

# Initialize lists for SST anomalies
SST_anoms_antwater = []

# Define a function to calculate anomalies for a single model
def calculate_anomaly_antwater(model):
    return (thetao_antwater[f'thetao_{model}_antwater'][60:70,:,:].mean(dim='time')
            - thetao_piControl_SOFIA[f'thetao_{model}_piControl'].mean(dim='time'))

# Calculate anomalies for each experiment and append to the lists
SST_anoms_antwater = [calculate_anomaly_antwater(model) for model in model_names_antwater]

# Convert lists to xarray DataArrays
SST_anoms_antwater = \
xr.concat(SST_anoms_antwater, dim='model', coords='minimal', compat='override')

CPU times: user 167 ms, sys: 32.5 ms, total: 199 ms
Wall time: 198 ms


In [16]:
SST_anoms_antwater = SST_anoms_antwater.assign_coords(model=model_names_antwater)

In [18]:
# save postprocessed file for anomaly maps of each model. 

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/postprocessed_analysis_notebooks')

output_filename = 'SST_response_maps_antwater.nc'
SST_anoms_antwater.to_netcdf(output_filename)

In [19]:
# calculate spatially averaged SST response (unscaled, for climate response functions )


def subset_bylatitude(data, south_bound, north_bound):
    lat_mask = (data.lat >= south_bound) & (data.lat <= north_bound)
    data_SO = data.where(lat_mask, drop=True)
    return data_SO


# spatially average CMIP6 Omon field over specified latitudes
# assumes spatial average is conducted over all longitudes
# Assumes regrid_dataarray function is already applied
def spatial_average(inputdata,southlat,northlat):
    data_subset = subset_bylatitude\
    (inputdata,south_bound=southlat,north_bound=northlat)
    lat_subset = data_subset.lat
    coslat = np.cos(np.deg2rad(lat_subset))
    weight_factor = coslat / coslat.mean(dim='y')
    data_average = \
    (data_subset * weight_factor).mean(dim=('x', 'y'),skipna=True)
    return data_average


# Initialize dictionary
antwater_CRF_dict = {}

for model in model_names_antwater:
    
    antwater_CRF_dict[f'CRF_{model}_antwater'] = \
    (spatial_average(thetao_antwater[f'thetao_{model}_antwater'],-65,-50) - \
    spatial_average(thetao_piControl_SOFIA[f'thetao_{model}_piControl'].mean(dim='time'),-65,-50)) 

In [20]:
# save postprocessed file for response functions. 

os.chdir('/oak/stanford/groups/earlew/zkaufman/Archive_KaufmanGRL2025/postprocessed_analysis_notebooks')

def save_data_arrays_to_netcdf(data_arrays, filename):
    first_data_array_saved = False

    for var_name, data_array in data_arrays.items():
        mode = 'w' if not first_data_array_saved else 'a'
        data_array.to_netcdf(filename, mode=mode, group=var_name)
        first_data_array_saved = True
        
        
save_data_arrays_to_netcdf(antwater_CRF_dict, 'antwater_CRFs_unscaled.nc')
