# Download data

Download sliced data from an online source.

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import copy
import os
import time

import numpy as np
import xarray as xr

from pyplume.constants import FIELD_NETCDF_DIR
from pyplume import dataloaders, utils

In [None]:
def get_latest_span(delta):
    # GMT, data recorded hourly
    time_now = np.datetime64("now", "h")
    return (time_now - delta, time_now)

### format of region_data stuff

(name, resolution, time range, lat range, lon range, expand range)

### about tj_sample

the purpose of tj_sample is a quick and dirty way to sample the thredds data from a bunch of different times to find out the positions of where data exists. data in close time ranges could all have the same holes in data, and we would never know if data was supposed to be there in the first place.

so tj_sample is generated for the sole purpose of creating a mask showing where data shouldn't exist.

## data masks

where is there data? every timestep of HFR data is not always complete, so we need to know what nan points were supposed to have data and which ones were never meant to have data.

A good way to find this out is to take several slices of data over a long period of time and check the coverage of each timestamp. This is the easiest way to kind of see the true coverage of HFR.

In [None]:
region_data = [
    {
        "name": "hurrhenri_hycom",
        "url": "https://tds.hycom.org/thredds/dodsC/GLBy0.08/expt_93.0/uv3z",
        "time_range": ("2021-08-21T12:00", "2021-08-30T18:00"),
        "lat_range": (38.162201, 41.520008),
        "lon_range": (284.290368, 290.276249),
        "inclusive": True,
        "u_key": "water_u",
        "v_key": "water_v",
        "drop_vars": ["tau"]
    },
    # {
    #     "name": "hurrkay_2km",
    #     "url": "http://hfrnet-tds.ucsd.edu/thredds/dodsC/HFR/USWC/2km/hourly/RTV/HFRADAR_US_West_Coast_2km_Resolution_Hourly_RTV_best.ncd",
    #     "time_range": ["2022-09-08T06:00", "2022-09-12T00:00"],
    #     "lat_range": (32.11093, 32.73124),
    #     "lon_range": (-118.565, -115.9924),
    #     "inclusive": True,
    # },
    # {
    #     "name": "hurrkay_6km",
    #     "url": "http://hfrnet-tds.ucsd.edu/thredds/dodsC/HFR/USWC/6km/hourly/RTV/HFRADAR_US_West_Coast_6km_Resolution_Hourly_RTV_best.ncd",
    #     "time_range": ["2022-09-08T06:00", "2022-09-12T00:00"],
    #     "lat_range": (32.11093, 32.73124),
    #     "lon_range": (-118.565, -115.9924),
    #     "inclusive": True,
    # },
]

In [None]:
save_dir = utils.get_dir(FIELD_NETCDF_DIR)
regions = []
for rd in region_data:
    rd = copy.deepcopy(rd)
    name = rd.pop("name")
    url = rd.pop("url")
    with dataloaders.DataLoader(url, **rd) as dl:
        dl.save(save_dir / f"{name}.nc")