# access thredds

Download sliced netcdf data from Thredds

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import time
import numpy as np
import xarray as xr

import utils
from thredds_utils import ThreddsCode, get_thredds_dataset
from parcels_utils import xr_dataset_to_fieldset

In [None]:
def get_latest_span(delta):
    # GMT, data recorded hourly
    time_now = np.datetime64("now", "h")
    return (time_now - delta, time_now)


def get_time_slice(time_range):
    if len(time_range) == 2:
        return slice(np.datetime64(time_range[0]), np.datetime64(time_range[1]))
    if len(time_range) == 3:
        # step size is an integer in hours
        return slice(np.datetime64(time_range[0]), np.datetime64(time_range[1]), time_range[2])
    

def get_regs_year(year, name, lat_rng, lon_rng):
    regions = []
    months = np.arange(str(year), str(year + 1), dtype="datetime64[M]")
    for m in months:
        days = np.arange(m, m + np.timedelta64(1, "M"), dtype="datetime64[D]")
        timerng = (np.datetime64(days[0], "h"), days[-1] + np.timedelta64(23, "h"))
        reg1 = (f"{name}_{m}", ThreddsCode.USWC_1KM_HOURLY, timerng, lat_rng, lon_rng, False)
        reg2 = (f"{name}_{m}", ThreddsCode.USWC_2KM_HOURLY, timerng, lat_rng, lon_rng, True)
        reg6 = (f"{name}_{m}", ThreddsCode.USWC_6KM_HOURLY, timerng, lat_rng, lon_rng, True)
        regions.append(reg1)
        regions.append(reg2)
        regions.append(reg6)
    return regions

### format of region_data stuff

(name, resolution, time range, lat range, lon range, expand range)

### about tj_sample

the purpose of tj_sample is a quick and dirty way to sample the thredds data from a bunch of different times to find out the positions of where data exists. data in close time ranges could all have the same holes in data, and we would never know if data was supposed to be there in the first place.

so tj_sample is generated for the sole purpose of creating a mask showing where data shouldn't exist.

In [None]:
# tuple reference
# (name, region code, time range, lat range, lon range, include domain endpoints)
region_data = [
    ("tj_sample", ThreddsCode.USWC_1KM_HOURLY, ("2019-01-01T00", "2021-01-01T00", 300), (32.11093, 32.73124), (-117.565, -116.9924), False),
#     ("tj_plume", ThreddsCode.USWC_1KM_HOURLY, ("2020-08-01T01", "2020-08-14T13"), (32.11093, 32.73124), (-117.565, -116.9924), False),
#     ("tj_plume", ThreddsCode.USWC_2KM_HOURLY, ("2020-08-01T01", "2020-08-14T13"), (32.11093, 32.73124), (-117.565, -116.9924), True),
#     ("tj_plume", ThreddsCode.USWC_6KM_HOURLY, ("2020-08-01T01", "2020-08-14T13"), (32.11093, 32.73124), (-117.565, -116.9924), True),
#     ("tijuana_river", ThreddsCode.USWC_1KM_HOURLY, ("2020-06-16T21", "2020-06-23T21"), (32.528, 32.71), (-117.29, -117.11), False),
#     ("tijuana_river", ThreddsCode.USWC_2KM_HOURLY, ("2020-06-16T21", "2020-06-23T21"), (32.524, 32.75), (-117.32, -117.09), False),
#     ("tijuana_river", ThreddsCode.USWC_6KM_HOURLY, ("2020-06-16T21", "2020-06-23T21"), (32.35, 32.80), (-117.33, -116.9), False),
#     ("tijuana_river_small", ThreddsCode.USWC_1KM_HOURLY, ("2020-06-16T21", "2020-06-23T21"), (32.528, 32.6), (-117.19, -117.11), False)
#     ("tijuana_river_now", ThreddsCode.USWC_1KM_HOURLY, get_latest_span(np.timedelta64(300, "D")), (32.528, 32.71), (-117.29, -117.11), False),
#     ("tijuana_river_now", ThreddsCode.USWC_2KM_HOURLY, get_latest_span(np.timedelta64(300, "D")), (32.524, 32.75), (-117.32, -117.09), False),
#     ("tijuana_river_now", ThreddsCode.USWC_6KM_HOURLY, ("2019-09-28T21:00", "2020-07-24T20"), (32.35, 32.80), (-117.33, -116.9), False),
#     ("missing_buoy", ThreddsCode.USWC_6KM_HOURLY, ("2021-01-29T05", "2021-02-02T16"), (33.15, 33.778072), (-118.697986, -117.6), False)
]


# for rd in get_regs_year(2020, "tj_plume", (32.11093, 32.73124), (-117.565, -116.9924)):
#     region_data.append(rd)

In [None]:
regions = []
for rd in region_data:
    dataset = get_thredds_dataset(rd[1], rd[2], rd[3], rd[4], inclusive=rd[5])
    new_reg = {"name": rd[0], "dataset": dataset}
    regions.append(new_reg)
    print(f"region {new_reg['name']} data megabytes: {new_reg['dataset'].nbytes / 1024 / 1024}")

In [None]:
for i, r in enumerate(regions):
    save_dir = utils.create_path(utils.CURRENT_NETCDF_DIR)
    filename = f"{r['name']}_{region_data[i][1]}.nc"
    # save file
    r["dataset"].to_netcdf(save_dir / filename)
    print(f"saved to {save_dir / filename}")
print("done")