# Downloading Daily Salinity for Northeat US

In [1]:
import xarray as xr
import pandas as pd
import os
from scipy.interpolate import griddata
from scipy.interpolate import LinearNDInterpolator
from pyproj import Transformer
import numpy as np
from tqdm import tqdm
import rioxarray
from shapely.geometry import Polygon
from matplotlib.path import Path
import geopandas as gpd
import matplotlib.pyplot as plt

# Path to local assets:
local_path = "/Users/adamkemberling/Documents/Repositories/Lobster-ECOL/"

# Build path to save it somewhere
out_path = f"{local_path}local_data/plumes/fvcom_daily_salinity/"

In [2]:
# Path to seaplan catalog (where we can see all the files available):
# http://www.smast.umassd.edu:8080/thredds/catalog/models/fvcom/NECOFS/Archive/Seaplan_33_Hindcast_v1/catalog.html

# Link to the Seaplan 33 Hindcast (1978-2019 data) (2011-2016 available as daily means):
base_daily_url = "http://www.smast.umassd.edu:8080/thredds/dodsC/models/fvcom/NECOFS/Archive/Seaplan_33_Hindcast_v1/daily_mean"

# Generate all the URLs with a good old fashioned loop:

# Basic Structures for the year and month components
all_months = np.arange(1,13)
all_years = np.arange(2011,2017, 1)
daily_thredds_urls = []
for yr in all_years:
    yr_base = f"{base_daily_url}/gom3_daily_mean_{yr}"
    
    for mon in all_months:
        yr_mon_ext = f"{yr_base}{mon:02d}.nc"
        daily_thredds_urls.append(yr_mon_ext)


# Peak at the first few URLS
daily_thredds_urls[0:2]

# # Open one
test_ds = xr.open_dataset(daily_thredds_urls[0],decode_times=False)
test_ds

## Subset Using Mesh Clipping

In a separate qmd file we loaded the gom3 mesh as a simple feature geometry and clipped it to our area of interest. Then we exported a csv that contained the node and centroid indices that correspond to this clipped region.

The following chunk loads that file to use for indexing out those locations:

In [4]:
# Index information for the grid points we want to extract:
# This is a file that was generated in a separate qmd file that clipped the gom3
grid_pts = pd.read_csv(f"{local_path}local_data/plumes/grid_1km_fvcom_indices.csv")

# Unique Nodes and elements from our area of interest
node_ids = pd.melt(grid_pts.drop(columns = ["lon", "lat", "elem", "p1_wt", "p2_wt","p3_wt"]))["value"].unique()
elem_ids = grid_pts["elem"].unique()
# indices are 1-based in the FVCOM files (and the table), but 0-based in Python
py_node_ids = node_ids -1 
py_elem_ids = elem_ids - 1


In [5]:

# Subset Check The nodes, elements, variables
trim_test = test_ds.isel(
    node = py_node_ids, 
    nele = py_elem_ids,
    siglay = [0, -1],  # Surface and bottom
    time = 0)

# # Plot the temperatures at the node coordinates
# # Just grab the coordinates, tired of the dataset baggage
# lon_vals = trim_test.lon.values
# lat_vals = trim_test.lat.values

# # Plot the surface using lon and lat values
# # Plot the surface so it is easy to see

# fig, ax = plt.subplots(1,1, figsize=(6, 4))  # Create a figure containing a single axes.
# ax[0].scatter(x = lon_vals, y = lat_vals,  c = trim_test.salinity.isel(siglay = 0), s = .5, cmap = "coolwarm", vmin = 2, vmax = 10)
# ax[0].set_title("Study Area - Surface Salnity")  # Add a title to the axes.

## Assembling Paired-Down Datsets for Export

In [7]:
# Doesn't work, and takes forever

# # Rebuild a dataset without siglay using surface and bottom salinity
# sss_all = trim_test.salinity.isel(siglay=0).values
# bs_all = trim_test.salinity.isel(siglay=-1).values  

# # Combine the data into a new dataset
# ds_new = xr.Dataset(
#     coords=dict(
#         time = trim_test['time'].values,
#         x = trim_test['lon'].values,
#         y = trim_test['lat'].values
#     ),
#     data_vars=dict(
#         surface_salinity = (["time", "lat", "lon"], sss_all),
#         bottom_salinity = (["time", "lat", "lon"], bs_all),
#         lon = trim_test['lon'].values,
#         lat = trim_test['lat'].values,
#         lonc = trim_test['lonc'].values,
#         latc = trim_test['latc'].values
#     ),
#     attrs={"crs": "EPSG:4326"}
# )
# ds_new

# LOOPING OVER FILES TO SAVE LOCALLY

I had the idea to just save what we would have from this step and do the more complicated things later.

In [None]:
# Loop 2011-2016 - Online Daily Data, pull top siglev for salinity
for file in daily_thredds_urls:
# for file in daily_thredds_urls[0:2]:
    
    # Read the file:
    ds_x = xr.open_dataset(file, decode_times=False)

    # Trim to study area
    ds_x = ds_x.isel(
        node = py_node_ids, 
        nele = py_elem_ids)



# Build output dataset - using lat and lon coordinates
ds_interp = xr.Dataset(
    coords=dict(
        time = ds_x['Times'].values,
        x = ds_x['x'].values,
        y = ds_x['y'].values
    ),
    data_vars=dict(
        surface_salinity = (["time", "lat", "lon"], ds_x['salinity'].isel(siglay=0).values),
        bottom_salinity = (["time", "lat", "lon"], ds_x['salinity'].isel(siglay=-1).values)),
        lon = ds_x['lon'].values,
        lat = ds_x['lat'].values,
        lonc = ds_x['lonc'].values,
        latc = ds_x['latc'].values
    ),
    attrs={"crs": "EPSG:4326"})

# Carry over the attributes from the original dataset
ds_interp.attrs = ds_x.attrs    

# Build path to save it somewhere
out_name = file.replace(f"{base_daily_url}/gom3_daily_mean_", "").replace(".nc", "")
print(f"Saving file for {out_name}")
out_name = f"{out_path}{out_name}_salinity_daily.nc"

# Export:
ds_masked.to_netcdf(out_name)