# Ocean thermal forcing -- additional reanalysis datasets.
Clean ocean TF workflow to process reanalyses, other than the depth-averaged EN4 dataset already provided with Verjans repo.

19 Mar 2025 | EHU
- Tried ASTE, but tiled format of files made it difficult to read in as a multifile dataset.  Ask Mike for suggestions on processing this, if necessary.
- 2 Apr: Tried ORAS5, but only had temperature data by default, and Copernicus data service was not functioning to download salinity.
- 4 Apr: Try EN4 with g10 correction.  Vincent provides a depth-averaged TF product; we will try to make one on multiple levels.
- 9 Apr: Add 1950-1990 data.

In [None]:
import os
import sys
import glob
import copy
import csv
import numpy as np
import netCDF4 as nc
import xarray as xr
import dask
from datetime import datetime

from verjansFunctions import freezingPoint

In [None]:
### Settings for this run
saveBoxGreenlandNC = True
cwd                = os.getcwd()+'/'

SelModel = 'Hadley'

data_directory = f'/Users/eultee/Library/CloudStorage/OneDrive-NASA/Data/Ocean-reanalyses/Hadley'
# DirThetaoNC = data_directory + f'THETA'
# DirSoNC     = data_directory + f'SALT/'
DirSaveNC   = f'/Users/eultee/Library/CloudStorage/OneDrive-NASA/Data/gris-iceocean-outfiles/'


### Limits of Greenland domain ###
limN           = 86.0 ## degrees N latitude
limS           = 57.0 ## degrees N latitude
limE           = 4.0 ## degrees E latitude
limW           = 274.0 ## degrees E latitude
## CHECK: confirm that output shows up within this W-E box and not its E-W complement
limDp          = 1200.0
depthSubSample = 1



Load in from multiple files. EN4 comes with one NC file per month.

In [None]:
## load all tiles together using multifile dataset
## 19 Mar: doesn't work for now.  Try a single file to check structure?
with xr.open_mfdataset(f'{data_directory}/g10/EN*.nc') as ds_temp: 
    ds = ds_temp.load()

# with xr.open_dataset(f'{DirThetaoNC}/THETA.'+'{:04d}.nc'.format(tile_numbers[1])) as ds_temp:
#     ds = ds_temp.load()
# ds

In [None]:
ds

### Trim the data to Greenland bounding box

Find the lat/lon included in the bounding box and work only with this subset of the data.

In [None]:
## trim to Greenland bounding box
include_lat = (ds.lat>=limS) & (ds.lat <=limN)
include_lon = np.logical_or(((ds.lon%360)<=limE),((ds.lon %360) >=limW)) 
## modulo 360 to account for lon going -180 to 180 or 0-360

with dask.config.set(**{'array.slicing.split_large_chunks': True}): ## mitigate performance problem with slicing
    gld_ds = ds.where(include_lat & include_lon, drop=True)

# ## load and trim so
# with xr.open_dataset(path1, chunks={'lev':10}) as ds1:
#     ## trim to Greenland bounding box
#     include_lat = (ds1.lat>=limS) & (ds1.lat <=limN)
#     include_lon = np.logical_or(((ds1.lon%360)<=limE),((ds1.lon %360) >=limW))
    
#     with dask.config.set(**{'array.slicing.split_large_chunks': True}): ## mitigate performance problem with slicing
#         gld_so = ds1.where(include_lat & include_lon, drop=True) ## trim to Gld
    


In [None]:
gld_ds

### Compute the ocean thermal forcing

In [None]:
fp = xr.apply_ufunc(freezingPoint, gld_ds.salinity, gld_ds.depth, dask='parallelized',
                   dask_gufunc_kwargs={'allow_rechunk':True})
fftf = gld_ds.temperature - 273.15 - fp ## convert from Kelvin to Celsius

In [None]:
## mask and apply a fill value
tf_out = fftf.where(gld_ds.temperature<1e10) ## apply Vincent's fill value of 1.1e20
## actually, just let xarray do its native processing with NaNs.

In [None]:
tf_out.assign_attrs(standard_name='TF',
                    long_name='Ocean thermal forcing',
                    # fillvalue=1.1e20,
                    latbounds=[limS, limN],
                    lonbounds=[limW,limE])

In [None]:
now = datetime.now()
ds_temp = tf_out.to_dataset(name='TF')
# ds_temp.TF.attrs = tf_out.attrs
ds_out = ds_temp.assign_attrs(title='Ocean thermal forcing for {}'.format(SelModel),
                             summary='TF computed following Verjans code, in a bounding' + 
                              ' box around Greenland, for ISMIP7 Greenland forcing.' +
                              ' This version for {}'.format(SelModel),
                             institution='NASA Goddard Space Flight Center',
                             creation_date=now.strftime('%Y-%m-%d %H:%M:%S'))

ds_out

In [None]:
ds_out.info()

### Write NetCDF out
Write to a custom filename in the directory specified above.  Remember to rename the file as needed, e.g. for the correct date range.

In [None]:
# out_path = '/home/theghub/ehultee/data/'
# year_tag = path0.strip('.nc').split('_')[-1] ## take the year tag from the GCM input (only one of the two input DS, but we have tried to make them match!)
out_fn = DirSaveNC + 'tf-{}-1950_2020.nc'.format(SelModel)

from dask.diagnostics import ProgressBar

with ProgressBar():
    ds_out.to_netcdf(path=out_fn)

### Check the output

In [None]:
import cartopy  # Map projections libary
import cartopy.crs as ccrs  # Projections list

In [None]:
f_in = out_fn

ds_new = xr.open_dataset(f_in)

In [None]:
ds_new

In [None]:
tf_tavg = ds_new.TF.mean(dim='time') 
tf_tavg

In [None]:
tf_tavg.sel(depth=5.02, method='nearest').mean(skipna=True)

In [None]:
import matplotlib.pyplot as plt
ax = plt.axes(projection=ccrs.Robinson())
tf_tavg.sel(depth=5.02, method='nearest').plot(ax=ax, transform=ccrs.PlateCarree(), x='lon', y='lat') ## specify x and y coordinates
ax.coastlines(); ax.gridlines();

In [None]:
tf_tavg.sel(depth=5.02, method='nearest').plot()