# Daily Grids to netCDF
Take the daily grids from `download_daily_gridmet`, combine and clean them into DataSets, and export them as netCDF files.

In [None]:
import xarray as xr
import os
import numpy as np
import datetime
import pandas as pd
import rasterio as rio
import rioxarray

## Utility functions

In [None]:
def open_rasterio_and_set_dates(path):
    """Open a rasterio image with dates as bands and return with the date coordinates renamed and set, assuming bands
    follow the format yymmdd-z_variable
    """
    def datetime_from_band_name(band_name):
        date = band_name.split('_')[0]
        return pd.to_datetime(date)

    da = xr.open_rasterio(path)
    dates = [datetime_from_band_name(band) for band in da.descriptions]
    da = da.assign_coords({"band": dates}).rename({"band": "date"})
    return da

In [None]:
def get_variables_from_paths(paths):
    """Return a list of unique variable names, assuming files follow the format dataset_variable_year.tif
    """
    variables = [path.split('_')[1] for path in paths]
    return np.unique(variables)

In [None]:
def split_paths_by_variable(paths):
    """Take one list of paths and split it into one list per unique variable, assuming files follow the format
    dataset_variable_year.tif. Yield a tuple with the variable name and the list of path lists for each variable.
    """
    def get_variable_from_path(path):
        """Get a variable name from a path, assuming the file follows the format dataset_variable_year.tif.
        The band name may contain underscores but the dataset and name may not.
        """
        split = os.path.basename(path).split('_')

        dataset = split[0] + "_"
        year = "_" + split[-1]

        variable = os.path.basename(path).removeprefix(dataset).removesuffix(year)
        return variable
    
    def get_unique_variables_from_paths(paths):
        """Return a list of unique variable names, assuming files follow the format dataset_variable_year.tif
        """
        variables = [get_variable_from_path(path) for path in paths]
        return np.unique(variables)
    
    variables = get_unique_variables_from_paths(paths)
    
    for var in variables:
        yield (var, [file for file in paths if var in file])

In [None]:
def load_dataset(directory):
    """Load all rasters in a directory that belong to a single dataset with one or more variables.
    """
    files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith(".tif")]
    
    variable_das = []
    for variable, path_list in split_paths_by_variable(files):
        das = [open_rasterio_and_set_dates(file) for file in path_list]
        da = xr.concat(das, dim="date")
        da.name = variable
        variable_das.append(da)
    
    ds = xr.merge(variable_das)
    
    return ds

## Constants

In [None]:
data_dir = os.path.join("..", "data")

## gridMET

In [None]:
gridmet_dir = os.path.join(data_dir, "gridMET")
gridmet = load_dataset(gridmet_dir)
gridmet.rio.write_crs(gridmet.crs, inplace=True)

In [None]:
gridmet.to_netcdf(os.path.join(data_dir, "gridMET", "gridMET.nc"))