# Ocean thermal forcing from ORAS5 data
Clean ocean TF workflow to process ORAS5 reanalysis data.

21 Apr 2025 | EHU
- Note that dataset is very large (~350 GB) due to high resolution of ORAS model.  Attempt to load in lat/lon subsets.

In [None]:
import os
import sys
import glob
import copy
import csv
import numpy as np
import netCDF4 as nc
import xarray as xr
import dask
from datetime import datetime

from verjansFunctions import freezingPoint

In [None]:
### Settings for this run
saveBoxGreenlandNC = True
cwd                = os.getcwd()+'/'

SelModel = 'ORAS5'

data_directory = f'/Users/eultee/Library/CloudStorage/OneDrive-NASA/Data/Ocean-reanalyses/'+SelModel
DirSaveNC   = f'/Users/eultee/Library/CloudStorage/OneDrive-NASA/Data/gris-iceocean-outfiles/'


### Limits of Greenland domain ###
limN           = 86.0 ## degrees N latitude
limS           = 57.0 ## degrees N latitude
limE           = 4.0 ## degrees E latitude
limW           = 274.0 ## degrees E latitude
## CHECK: confirm that output shows up within this W-E box and not its E-W complement
limDp          = 1200.0
depthSubSample = 1



### Load and trim data.
Load in from multiple files. ORAS5 will be too big to load in fully before trimming.  Also, potential temp and salinity come in different datasets. Custom read-in command for this use case.

In [None]:
## load all tiles together using multifile dataset
## trim before loading in to memory

with xr.open_mfdataset(f'{data_directory}/votemper*.nc') as ds_temp: 
    ## trim to Greenland bounding box
    include_lat = (ds_temp.nav_lat>=limS) & (ds_temp.nav_lat <=limN)
    include_lon = np.logical_or(((ds_temp.nav_lon%360)<=limE),((ds_temp.nav_lon %360) >=limW)) 
    ## modulo 360 to account for lon going -180 to 180 or 0-360
    gld_pt = ds_temp.where((include_lat & include_lon).compute(), drop=True)
    
    with dask.config.set(**{'array.slicing.split_large_chunks': True}): ## mitigate performance problem with slicing
        gld_ds_pt = gld_pt.load()
    

In [None]:
gld_ds_pt.votemper.mean() ## check it's reasonable and non-NAN

In [None]:
with xr.open_mfdataset(f'{data_directory}/vosaline*.nc') as ds_temp: 
    ## trim to Greenland bounding box
    include_lat = (ds_temp.nav_lat>=limS) & (ds_temp.nav_lat <=limN)
    include_lon = np.logical_or(((ds_temp.nav_lon%360)<=limE),((ds_temp.nav_lon %360) >=limW)) 
    ## modulo 360 to account for lon going -180 to 180 or 0-360
    gld_sal = ds_temp.where((include_lat & include_lon).compute(), drop=True)
    
    with dask.config.set(**{'array.slicing.split_large_chunks': True}): ## mitigate performance problem with slicing
        gld_ds_sal = gld_sal.load()

In [None]:
gld_ds_sal

### Compute the ocean thermal forcing

In [None]:
## This process for ORA5
fp = xr.apply_ufunc(freezingPoint, gld_ds_sal.vosaline, gld_ds_sal.deptht, dask='parallelized',
                    dask_gufunc_kwargs={'allow_rechunk':True})
fftf = gld_ds_pt.votemper - fp ## already in Celsius
fftf

### Prepare for write-out
Mask dataset to remove data missing from original.  Rename unconventionally-named dimensions.  Assign metadata to write to NetCDF.

In [None]:
## mask and apply a fill value
tf_out = fftf.where(gld_ds_pt.votemper<1e10) ## let xarray do its native processing with NaNs.

In [None]:
tf_out = tf_out.rename({'deptht':'depth', 
                        'nav_lon':'lon', 
                        'nav_lat':'lat', 
                        'time_counter':'time'})
tf_out.assign_attrs(standard_name='TF',
                    long_name='Ocean thermal forcing',
                    # fillvalue=1.1e20,
                    latbounds=[limS, limN],
                    lonbounds=[limW,limE])

In [None]:
now = datetime.now()
ds_temp = tf_out.to_dataset(name='TF')

ds_out = ds_temp.assign_attrs(title='Ocean thermal forcing for {}'.format(SelModel),
                             summary='TF computed following Verjans code, in a bounding' + 
                              ' box around Greenland, for ISMIP7 Greenland forcing.' +
                              ' This version for {}'.format(SelModel),
                             institution='NASA Goddard Space Flight Center',
                             creation_date=now.strftime('%Y-%m-%d %H:%M:%S'))

ds_out

In [None]:
ds_out.info()

### Write NetCDF out
Write to a custom filename in the directory specified above.  Remember to rename the file as needed, e.g. for the correct date range.

In [None]:
out_fn = DirSaveNC + 'tf-{}-1990_2005.nc'.format(SelModel)

from dask.diagnostics import ProgressBar

with ProgressBar():
    ds_out.to_netcdf(path=out_fn)

### Check the output

In [None]:
import cartopy  # Map projections libary
import cartopy.crs as ccrs  # Projections list

In [None]:
f_in = out_fn

ds_new = xr.open_dataset(f_in)

In [None]:
ds_new

In [None]:
tf_tavg = ds_new.TF.mean(dim='time') 
tf_tavg

In [None]:
tf_tavg.sel(depth=5.02, method='nearest').mean(skipna=True)

In [None]:
import matplotlib.pyplot as plt
ax = plt.axes(projection=ccrs.Robinson())
tf_tavg.sel(depth=200, method='nearest').plot(ax=ax, transform=ccrs.PlateCarree(), x='lon', y='lat') ## specify x and y coordinates
ax.coastlines(); ax.gridlines();

In [None]:
demo_depth = 100
fig, ax = plt.subplots()
tf_tavg.sel(depth=demo_depth, method='nearest').plot(ax=ax)
ax.set(title='TF at {} m depth, {}, average 1990-2005'.format(demo_depth, SelModel))