# Process c404 bias corrected daily data to NHGF geospatial fabric v1.1

In [None]:
import xarray as xr
import hvplot.xarray
import hvplot.pandas
import hvplot.dask

import warnings

import datetime
import dask.bag as db
import pandas as pd
import geopandas as gpd

from holoviews.element.tiles import EsriTerrain

from gdptools.helpers import calculate_weights
from gdptools.helpers import run_weights
from gdptools.helpers import finalize_netcdf

warnings.filterwarnings('ignore')

Functions to determine starting and ending dates for each calendar year.  Data processed to file by year.

In [2]:
t_start_series = pd.date_range(pd.to_datetime("1980-01-01"), periods=40, freq="AS")
t_end_series = pd.date_range(pd.to_datetime("1980-12-31"), periods=40, freq="A")
f_time_series = pd.date_range(pd.to_datetime("1980"), periods=40, freq="1Y")

# t_start_series = pd.date_range(pd.to_datetime("2008-01-01"), periods=14, freq="AS")
# t_end_series = pd.date_range(pd.to_datetime("2008-12-31"), periods=14, freq="A")
# f_time_series = pd.date_range(pd.to_datetime("2008"), periods=14, freq="1Y")

time_start = [t.strftime("%Y-%m-%dT%H:%M:%S.%f") for t in t_start_series]
time_end = [t.strftime("%Y-%m-%dT%H:%M:%S.%f") for t in t_end_series]
file_time = [t.strftime("%Y") for t in f_time_series]

In [3]:
time_start[0]

'2008-01-01T00:00:00.000000'

In [4]:
# c404 bc daily data projection from dataset variable crs attributes
c404_proj = "+proj=lcc +units=m +a=6370000.0 +b=6370000.0 +lat_1=30.0 +lat_2=60.0 +lat_0=40.0 +lon_0=-97.0 +x_0=0 +y_0=0 +k_0=1.0 +nadgrids=@null +wktext +no_defs"

In [5]:
# hru geometry file
gdf = gpd.read_file('GFv1.1_simple.shp')
len(gdf.index)

139801

In [6]:
#hru ID
gdf2 = gdf.groupby('nhru_v1_1')
gdf2.head()
            

Unnamed: 0,LAYER,GM_TYPE,OBJECTID,nhru_v1_1,hru_segme1,nhm_id,hru_id_nat,Version,Shape_Leng,Shape_Area,Change,geometry
0,NHM\nhru_v1_1_simp,Unknown Area Type,1,76127,40038,76128,76128,1.0,80441.423,188118773,-0.017302,"POLYGON ((-105544.567 804074.976, -105415.315 ..."
1,NHM\nhru_v1_1_simp,Unknown Area Type,2,76147,40038,76148,76148,1.0,53413.506,44185975,0.054540,"POLYGON ((-97185.217 806355.005, -97154.773 80..."
2,NHM\nhru_v1_1_simp,Unknown Area Type,3,76170,40021,76171,76171,1.0,54988.828,73389194,0.018316,"POLYGON ((-105894.643 815045.861, -105703.017 ..."
3,NHM\nhru_v1_1_simp,Unknown Area Type,3,76170,40021,76171,76171,1.0,54988.828,73389194,0.018316,"POLYGON ((-101865.303 821054.983, -101864.906 ..."
4,NHM\nhru_v1_1_simp,Unknown Area Type,3,76170,40021,76171,76171,1.0,54988.828,73389194,0.018316,"POLYGON ((-101865.303 821054.983, -101864.966 ..."
...,...,...,...,...,...,...,...,...,...,...,...,...
139796,NHM\nhru_v1_1_simp,Unknown Area Type,114954,57964,31028,57965,57965,1.0,117117.940,255080364,0.002392,"POLYGON ((-305985.253 2729805.067, -305865.062..."
139797,NHM\nhru_v1_1_simp,Unknown Area Type,114955,64080,28886,64081,64081,1.0,67362.398,110274075,0.002877,"POLYGON ((-1284135.000 2658485.000, -1284035.0..."
139798,NHM\nhru_v1_1_simp,Unknown Area Type,114956,64150,28866,64151,64151,1.0,80355.339,188483780,0.022769,"POLYGON ((-1347645.097 2651834.819, -1347614.8..."
139799,NHM\nhru_v1_1_simp,Unknown Area Type,114957,65633,31412,65634,65634,1.0,71022.681,73867000,-0.005068,"POLYGON ((-1017265.000 2869285.000, -1017185.0..."


In [7]:
ds2 = xr.open_dataset('/caldera/hytest_scratch/scratch/conus404/conus404_daily_bc.zarr', engine='zarr')

In [8]:
ds2

In [9]:
# wghts = pd.read_csv('c404_bc_wghts_gfv1_1.csv')
poly_idx = 'nhru_v1_1'

In [10]:
# some initial meta data for gdptools processing functions
data_crs = c404_proj
x_coord = "x"
y_coord = "y"
t_coord = "time"
sdate = "2016-10-01T00:00:00"
edate = "2016-10-01T00:00:00"
var = "daily_maximum_temperature"
shp_crs = 5070
shp_poly_idx = poly_idx
wght_gen_crs = 6931

In [11]:
dict_new = {
        "dims": {"feature": "nhru", "time": "time", "x": "x", "y": "x"},
        "feature": {
            "varname": "nhru",
            "long_name": "local model Hydrologic Response Unit ID (HRU)",
        },
        "lat": {
            "varname": "y",
            "long_name": "y coordinate of projection",
            "units": "m",
            "standard_name": "projection_y_coordinate",
        },
        "lon": {
            "varname": "x",
            "long_name": "x coordinate of projection",
            "units": "m",
            "standard_name": "projection_x_coordinate",
        },
        "RAIN": {
            "varname": "prcp",
            "long_name": "Daily total precipitation",
            "standard_name": "prcp",
            "convert": True,
            "native_unit": "millimeter",
            "convert_unit": "inches",
        },
        "T2MAX": {
            "varname": "tmax",
            "long_name": "Daily maximum temperature",
            "standard_name": "tmax",
            "convert": True,
            "native_unit": "degK",
            "convert_unit": "degF",
        },
        "T2MIN": {
            "varname": "tmin",
            "long_name": "Daily minimum temperature",
            "standard_name": "tmin",
            "convert": True,
            "native_unit": "degK",
            "convert_unit": "degF",
        },
    }

var_dict = {
        "RAIN": {
            "long_name": ds2["RAIN"].long_name,
            "units": ds2["RAIN"].units,
            "varname": "RAIN",
            "standard_name": "prcp"
        },
        "T2MAX": {
            "long_name": ds2["T2MAX"].long_name,
            "units": ds2["T2MAX"].units,
            "varname": "T2MAX",
            "standard_name": "tmax"
        },
        "T2MIN": {
            "long_name": ds2["T2MIN"].long_name,
            "units": ds2["T2MIN"].units,
            "varname": "T2MIN",
            "standard_name": "tmax"
        },
        
    }

In [12]:
vars = ["RAIN", "T2MAX", "T2MIN"]
for index, _ts in enumerate(time_start):
    sdate = time_start[index]
    edate = time_end[index]
    print(sdate, edate)
    # ds_ss = ds2.sel(time=slice(sdate,edate))
    vallist = []
    gdflist = []
    for _var in vars:
        ngdf, vals = run_weights(
            var=_var,
            x_coord=x_coord,
            y_coord=y_coord,
            t_coord=t_coord,
            ds=ds2,
            ds_proj=c404_proj,
            wght_file=wghts,
            shp=gdf,
            geom_id=shp_poly_idx,
            sdate=sdate,
            edate=edate
        )
        vallist.append(vals)
        gdflist.append(ngdf)
    result = finalize_netcdf(
        gdf=gdflist,
        vals=vallist,
        p_opath=".",
        prefix=f"{file_time[index]}_gfv11_c404_daily_bc",
        start_date=sdate,
        time_interval=1,
        time_type="days",
        var_dict=var_dict,
        use_opt_dict=True,
        work_dict=dict_new
    )


2008-01-01T00:00:00.000000 2008-12-31T00:00:00.000000
processing time for var: RAIN
    Processing RAIN for feature 1
    Processing RAIN for feature 50001
    Processing RAIN for feature 100001
processing time for var: T2MAX
    Processing T2MAX for feature 1
    Processing T2MAX for feature 50001
    Processing T2MAX for feature 100001
processing time for var: T2MIN
    Processing T2MIN for feature 1
    Processing T2MIN for feature 50001
    Processing T2MIN for feature 100001
3 <class 'geopandas.geodataframe.GeoDataFrame'> nhru_v1_1
output path exists
nhru Int64Index([     1,      2,      3,      4,      5,      6,      7,      8,
                 9,     10,
            ...
            114949, 114950, 114951, 114952, 114953, 114954, 114955, 114956,
            114957, 114958],
           dtype='int64', name='nhru_v1_1', length=114958)
Processing: RAIN
Processing: T2MAX
Processing: T2MIN
2009-01-01T00:00:00.000000 2009-12-31T00:00:00.000000
processing time for var: RAIN
    Processi