# load mats into netcdfs from plume tracker

if you have a directory of .mat file versions of the HFR data, this notebook converts those into a single NetCDF file

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io
import xarray as xr

from pyplume.constants import DATA_DIR, FIELD_NETCDF_DIR
import pyplume.utils as utils
from pyplume.dataloaders import dataset_to_fieldset

change settings accordingly

In [None]:
start_time = np.datetime64("2020-08-01T00:00", "m")
end_time = np.datetime64("2020-08-31T23:00", "m")
incr = np.timedelta64(1, "h")

coord_grid_path = DATA_DIR / "sdcodargrid_hfr.mat"

mat_tot_dir = DATA_DIR / "support_data/TrackerOutput/hourly"

In [None]:
TIJUANA_RIVER_DOMAIN = dict(
    S=32.528,
    N=32.71,
    W=-117.29,
    E=-117.11,
)
CLOSE_TIJUANA_DOMAIN = dict(
    S=32.53,
    N=32.5825,
    W=-117.162,
    E=-117.105,
)
THING_DOMAIN = {
    "S": 32.41,
    "N": 32.7,
    "W": -117.25,
    "E": -117
}

lats, lons = utils.load_pts_mat(DATA_DIR / "coastline.mat", "latz0", "lonz0")
coastline = np.array([lats, lons])
# just filter out the problematic upper coastline where it goes up and down
coastline = coastline[:, :288]

In [None]:
plt.scatter(coastline[1], coastline[0], s=4)

In [None]:
def coord_inland(coastline, lat, lon):
    lower_ind = np.where(coastline[0] <= lat)[0]
    if lower_ind.size > 0:
        lower_ind = lower_ind[-1]
    else:
        lower_ind = 0
    upper_ind = np.where(coastline[0] >= lat)[0]
    if upper_ind.size > 0:
        upper_ind = upper_ind[0]
    else:
        upper_ind = -1
    left = min(coastline[1][lower_ind], coastline[1][upper_ind])
    # can get away with this since the coastline points are dense enough
    return lon > left


def parse_mat_time(time):
    date = np.datetime64("0") + np.timedelta64(int(time) - 1, "D")
    hour = int(round((time - int(time)) * 24))
    return date + np.timedelta64(hour, "h")


def parse_time(time):
    d = "".join(str(time).split("T")[0].split("-"))
    t = "".join(str(time).split("T")[1].split(":")[:2])
    return d, t


def dataset_from_mat(grid_mat_path, current_mat_path, remove_land_currents):
    grid_mat = scipy.io.loadmat(grid_mat_path)
    current_mat = scipy.io.loadmat(current_mat_path)
    if "totalGrid" in grid_mat:
        coords = grid_mat["totalGrid"]
    else:
        gx = grid_mat["gx"]
        gy = grid_mat["gy"]
        coords = np.empty((gx.size, 2), dtype=np.float32)
        coords[:, 0] = gx.flatten()
        coords[:, 1] = gy.flatten()
    u = current_mat["U"]
    v = current_mat["V"]
    time = np.array([parse_mat_time(current_mat["t"][0, 0])])
    lats = np.sort(np.unique(coords.T[1]))
    lons = np.sort(np.unique(coords.T[0]))
    u_grid = np.zeros((1, len(lats), len(lons)))
    v_grid = np.zeros((1, len(lats), len(lons)))
    for i in range(len(lons)):
        for j in range(len(lats)):
            if remove_land_currents and not np.isnan(u[0, i * len(lats) + j]) and coord_inland(coastline, lats[j], lons[i]):
                u_grid[0, j, i] = np.nan
                v_grid[0, j, i] = np.nan
            else:
                # data from .mat is measured in cm/s
                u_grid[0, j, i] = u[0, i * len(lats) + j] / 100
                v_grid[0, j, i] = v[0, i * len(lats) + j] / 100
    ds = xr.Dataset(
        {
            "U": (["time", "lat", "lon"], u_grid),
            "V": (["time", "lat", "lon"], v_grid),
        },
        coords={
            "time": time,
            "lat": lats,
            "lon": lons
        }
    )
    return ds

concatenates all the separate .mat files into a single dataset

In [None]:
ds_total = None
current_time = start_time

while current_time <= end_time:
    date_parsed, time_parsed = parse_time(current_time)
    filename = mat_tot_dir / f"Tot_SDLJ_{date_parsed}_{time_parsed}.mat"
    if filename.is_file():
        ds = dataset_from_mat(coord_grid_path, filename, False)
        if ds_total is None:
            ds_total = ds
        else:
            ds_total = xr.concat([ds_total, ds], dim="time")
    else:
        print(f"file {filename} not found")
    current_time += incr

In [None]:
# display fieldset to see if data was loaded correctly
fs = dataset_to_fieldset(ds_total)
fs.U.show()

In [None]:
date_parsed, _ = parse_time(start_time)
saveto = FIELD_NETCDF_DIR / f"oi_fields/Tot_SDLJ_202008.nc"
ds_total.to_netcdf(saveto)
print(f"saved to {saveto}")