# Consolidating Water Balance Data
In this notebook, we will combine daily GRIDMET climate data and NASA SMAP soil moisture data. We will add a calculation of reference evapotranspiration following the Hargreaves 1985 method. 

We will generate a separate file for each plot to be used for fitting a leaky bucket model and simulating soil moisture index.

In [1]:
from concurrent.futures import as_completed, ProcessPoolExecutor
import glob
import numpy as np
import os
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
from refit_fvs.hargreaves import (
    et_rad,
    hargreaves,
    inv_rel_dist_earth_sun,
    sol_dec,
    sunset_hour_angle,
)

In [3]:
DATA_DIR = "../../data"

In [4]:
PLOTS = os.path.join(DATA_DIR, "interim", "plot_info_for_climatena.csv")
plots = (
    pd.read_csv(PLOTS, dtype={"PLOT_ID": int})
    .rename({"ID1": "PLOT_ID"}, axis=1)
    .drop(["ID2"], axis=1)
    .set_index("PLOT_ID")
)
plots.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12156 entries, 60101550679 to 530907572668
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   lat     12156 non-null  float64
 1   lon     12156 non-null  float64
 2   el      12156 non-null  int64  
dtypes: float64(2), int64(1)
memory usage: 379.9 KB


In [5]:
plots.head()

Unnamed: 0_level_0,lat,lon,el
PLOT_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
60101550679,41.806228,-123.788726,761
60101551744,41.980638,-124.193526,91
60101551969,41.681432,-123.803842,701
60101552953,41.938125,-123.870868,640
60101553315,41.738938,-123.783382,1432


In [6]:
OVERWRITE = False

with tqdm(total=len(plots)) as pbar:
    for plot in plots.index.values:
        outfile = os.path.join(DATA_DIR, "interim", "leaky_bucket", f"{plot}.parquet")
        
        if not os.path.exists(outfile) or OVERWRITE:
            gridmet_ = os.path.join(DATA_DIR, "raw", "gridmet", f"{plot}.parquet")
            smap_ = os.path.join(DATA_DIR, "raw", "nasa_smap", f"{plot}.parquet")
        
            if os.path.exists(gridmet_):
                gridmet = pd.read_parquet(gridmet_).drop("PLOT_ID", axis=1)
                gridmet["DATE"] = pd.to_datetime(gridmet.DATE).dt.to_period("D")
                gridmet.set_index(["DATE"], inplace=True)

                # add hargreaves reference evapotranspiration to gridmet
                lat_ = np.ones(len(gridmet)) * np.deg2rad(plots.loc[plot, "lat"])
                doy_ = gridmet.index.day_of_year
                sol_dec_ = sol_dec(doy_)
                sha_ = sunset_hour_angle(lat_, sol_dec_)
                ird_ = inv_rel_dist_earth_sun(doy_)
                et_rad_ = et_rad(lat_, sol_dec_, sha_, ird_)
                gridmet["EREF_HARGREAVES"] = hargreaves(
                    gridmet.TMIN - 273.15, gridmet.TMAX - 273.15, et_rad_
                )
                avg_temp = (gridmet.TMIN + gridmet.TMAX) / 2 - 273.15
                gridmet.loc[avg_temp < 0, "EREF_HARGREAVES"] = 0
                
                gridmet = gridmet.resample("D").interpolate(limit_area="inside")
                
                # only merge with gridmet with smap if smap data exist
                if os.path.exists(smap_):
                    smap = pd.read_parquet(smap_).drop("PLOT_ID", axis=1)
                    smap["DATE"] = pd.to_datetime(smap.DATE).dt.to_period("D")
                    smap.set_index(["DATE"], inplace=True)
                    smap = smap.resample("D").interpolate(limit_area="inside")
                    gridmet = gridmet.merge(
                        smap, 
                        left_index=True, 
                        right_index=True, 
                        how="outer"
                    )
                
                # write gridmet to disk even if smap doesn't exit
                gridmet = gridmet.reset_index()
                gridmet["DATE"] = gridmet["DATE"].astype(str)
                gridmet.insert(0, "PLOT_ID", int(plot))
                gridmet.to_parquet(outfile, index=False)
                
        pbar.update()

  0%|          | 0/12156 [00:00<?, ?it/s]