### The purpose of this notebook to is to recreate the regress the SSH in GLORYS onto the CWI developed in Amaya et al. (2022)

In [1]:
import os; from os.path import exists
os.chdir('/vortexfs1/home/anthony.meza/Atmospheric Rivers and Waves/scripts')
plotsdir = lambda x="": "/vortexfs1/home/anthony.meza/Atmospheric Rivers and Waves/plots/" + x
GLORYS_dir = lambda x="": "/vortexfs1/home/anthony.meza/GLORYS_data" + x
GLORYS_data_dir = lambda x="": "/vortexfs1/home/anthony.meza/Atmospheric Rivers and Waves/GLORYS_processed/" + x
ERA5_data_dir = lambda x="": "/vortexfs1/home/anthony.meza/Atmospheric Rivers and Waves/ERA5_data/" + x

In [2]:
from help_funcs import * 
import gsw, gc , os, multiprocessing, importlib
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import xarray as xr
import seaborn as sns
import pandas as pd
import cmocean.cm as cm
import netCDF4 as nc
from pathlib import Path
from natsort import natsorted
import matplotlib.pyplot as plt
from multiprocessing import Pool
import dask_labextension

In [3]:
from pathlib import Path
from natsort import natsorted

In [4]:
def remove_seasonal(ds):
    climatology = ds.groupby("time.month").mean("time")
    anomalies = ds.groupby("time.month") - climatology
    return anomalies
def remove_daily_climatology(ds):
    climatology = ds.groupby("time.dayofyear").mean("time")
    anomalies = ds.groupby("time.dayofyear") - climatology
    return anomalies

In [5]:
from dask_jobqueue import SLURMCluster  # setup dask cluster 
cluster = SLURMCluster(
    cores=36,
    processes=1,
    memory='100GB',
    walltime='02:00:00',
    queue='compute',
    interface='ib0')
print(cluster.job_script())
cluster.scale(jobs=8)
from dask.distributed import Client
client = Client(cluster)
client

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -p compute
#SBATCH -n 1
#SBATCH --cpus-per-task=36
#SBATCH --mem=94G
#SBATCH -t 02:00:00

/vortexfs1/home/anthony.meza/mambaforge/envs/atm_rivers/bin/python -m distributed.cli.dask_worker tcp://172.16.3.56:34946 --nthreads 36 --memory-limit 93.13GiB --name dummy-name --nanny --death-timeout 60 --interface ib0



### Reading in the GLORYS and ERA5 data files. Files have been preprocessed and combined in order to take advantage of the **dask** feature of xarray. We also remove their seasonal cliamtologies

In [7]:
chunk_size = -1
def _preprocess(ds):
    return ds.sel(latitude = slice(-2, 60), longitude = slice(-150, -75))
ds = xr.open_mfdataset(
        GLORYS_data_dir("GLORYS_NE_PAC.nc"),
        data_vars="minimal",
        coords="minimal",
        compat="override",
        preprocess=_preprocess,
        parallel=True,
        chunks={"time":120, "latitude":chunk_size, "longitude":chunk_size, "depth":1},
        engine="netcdf4")

CPU times: user 270 ms, sys: 55.1 ms, total: 325 ms
Wall time: 3.09 s


In [8]:
def bathy_preprocess(ds):
    return ds.sel(depth = ds.depth.values, method = "nearest").sel(latitude = slice(-2, 60), 
                        longitude = slice(-150, -75))

bathy_ds = xr.open_mfdataset(GLORYS_data_dir("CaliforniaCoastalMask.nc"), 
                             preprocess=_preprocess,
                             parallel=True, 
                            chunks={"latitude":chunk_size, "longitude":chunk_size, "depth":1},
                            engine="netcdf4")
weighted_mask_ds = xr.open_mfdataset(GLORYS_data_dir("CaliforniaCoastalMaskWeighted.nc"), 
                             preprocess=_preprocess,
                             parallel=True, 
                            chunks={"latitude":chunk_size, "longitude":chunk_size, "depth":1},
                            engine="netcdf4")
LON, LAT = np.meshgrid(ds.longitude.values, ds.latitude.values)

In [10]:
#approximates the average latitude and longitude of each coastal point
latitudes = zonal_average_coastline(LAT, weighted_mask_ds)
longitudes = zonal_average_coastline(LON, weighted_mask_ds)
list_lats = stitch_zonal_average(latitudes)
list_lons = stitch_zonal_average(longitudes)

#plots the average latitude and longitude
dists = []
for i in range(0, len(list_lats)-1):
    a = haversine(list_lons[i], list_lats[i], list_lons[i+1], list_lats[i+1])
    dists = np.concatenate([dists, [a]])
plt.scatter(list_lons, list_lats, s = 0.1)

#computees cumulative distance from start to end of the "shelf"
cum_distance = np.concatenate([[0.0], np.cumsum(dists)]) 

SW
GC_C
SW
GC_C


In [12]:
#computes the averages of geophysical variables within the shelf
zos_dict = zonal_average_coastline(ds["zos"], weighted_mask_ds)
theta_dict = zonal_average_coastline_depth(ds["thetao"], weighted_mask_ds, bathy_ds.wet_mask)
for key in ["EQ", "COL", "SW", "GC_E", "GC_C", "GC_W", "NW"]:
    print(key)
    zos_dict[key] = zos_dict[key].compute()
    theta_dict[key] = theta_dict[key].compute()

    

EQ
COL
SW
GC_E
GC_C
GC_W
NW
CPU times: user 1min 35s, sys: 8.23 s, total: 1min 43s
Wall time: 19min 28s


In [14]:
xr_stich_zonal_average = lambda ds=stich_zonal_average_xr(ds, longitudes, latitudes, cum_distance)

#concatenates the averages into one dataset 
theato_coast = xr.concat(xr_stich_zonal_average(theta_dict), dim = "distance"); theato_coast.name = "thetao"
zos_coast = xr.concat(xr_stich_zonal_average(zos_dict), dim = "distance"); zos_coast.name = "zos"
coast_vars = xr.merge([theato_coast, zos_coast], compat='override')

#adding units
coast_vars.distance.attrs['units'] = 'km'
coast_vars.zos.attrs['units'] = 'm'
coast_vars.thetao.attrs['units'] = 'deg C'

In [16]:
hovmoller_ds = xr.Dataset(
        data_vars=dict(
        lat_path=(["distance"], list_lats ),
        lon_path=(["distance"], list_lons)
     ),
     coords=dict(
         distance=(["distance"], cum_distance),
         time=ds.time.values,
     ),)

In [20]:
hovmoller_mask_dict = zonal_average_coastline(bathy_ds[["EQ", "COL", "SW", "GC_E", "GC_C", "GC_W", "NW"]], weighted_mask_ds)

for key in ["EQ", "COL", "SW", "GC_E", "GC_C", "GC_W", "NW"]:
    print(key)
    hovmoller_mask_dict[key] = hovmoller_mask_dict[key].compute()

EQ
COL
SW
GC_E
GC_C
GC_W
NW


In [21]:
hovmoller_mask_ds =  xr.concat(xr_stich_zonal_average(hovmoller_mask_dict), dim = "distance")
hovmoller_ds = xr.merge([hovmoller_ds, hovmoller_mask_ds])
hovmoller_ds.distance.attrs['units'] = 'km'

In [26]:
coast_vars.to_netcdf(GLORYS_data_dir("GLORYS_Coastal_Vars.nc"),
             mode = "w", format = "NETCDF4", 
             engine = "netcdf4", compute = True)
hovmoller_ds.to_netcdf(GLORYS_data_dir("GLORYS_Coastal_Path.nc"),
             mode = "w", format = "NETCDF4", 
             engine = "netcdf4", compute = True)

In [None]:
coast_vars.thetao.isel(depth =2).plot()