<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Startup" data-toc-modified-id="Startup-1">Startup</a></span></li><li><span><a href="#Load-data" data-toc-modified-id="Load-data-2">Load data</a></span></li><li><span><a href="#Aggregation-and-writing-output" data-toc-modified-id="Aggregation-and-writing-output-3">Aggregation and writing output</a></span><ul class="toc-item"><li><span><a href="#SUMMA" data-toc-modified-id="SUMMA-3.1">SUMMA</a></span></li><li><span><a href="#VIC" data-toc-modified-id="VIC-3.2">VIC</a></span></li><li><span><a href="#PRMS" data-toc-modified-id="PRMS-3.3">PRMS</a></span></li></ul></li></ul></div>

# Startup

In [None]:
%pylab inline
import xarray as xr
import geopandas as gp
from rasterio import features
from affine import Affine
from jupyterthemes import jtplot
import cartopy.crs as ccrs

daily_slice = slice('01-01-1960', '12-31-2009')
out_vars = ['evaporation', 'precipitation', 'runoff', 'swe', 'soil_moisture', 'canopy_moisture']

SEC_PER_DAY = 86400
MM_PER_M = 1000

In [None]:
plt.subplots()
jtplot.style('grade3', fscale=1.3)
jtplot.figsize(x=18, y=10)
mpl.rcParams['figure.figsize'] = (18, 10)
plt.close()

# Load data

In [None]:
SHAPEFILE = './data/shapefile.shp'
SUMMA_WILL = '../willamette_merged.nc'
SUMMA_SNAKE = '../snake_merged.nc'
SUMMA_ROCKIES = '../rockies_merged.nc'
SUMMA_OLYS = '../olympics_merged.nc'
PRMS_DATASET = '../prms_data/merged.19500101-20111231.nc'
VIC_DATASET = '../vic_data/merged.19500101-20111231.nc'

summa_will = xr.open_dataset(SUMMA_WILL).sel(time=daily_slice)
summa_snake = xr.open_dataset(SUMMA_SNAKE).sel(time=daily_slice)
summa_rockies = xr.open_dataset(SUMMA_ROCKIES).sel(time=daily_slice)
summa_olys = xr.open_dataset(SUMMA_OLYS).sel(time=daily_slice)

# Aggregation and writing output
## SUMMA

In [None]:
def aggregate_summa(ds):
    ds = ds.where(ds['scalarTotalRunoff_mean'] > -100, drop=True)
    ds['precipitation'] = ds['pptrate_mean'] * SEC_PER_DAY 
    ds['precipitation'] = ds['precipitation'].roll(roll_coords=False, time=-1)
    ds['evaporation']   = SEC_PER_DAY * (ds['scalarGroundEvaporation_mean']
                                         + ds['scalarCanopyEvaporation_mean']
                                         + ds['scalarCanopyTranspiration_mean']
                                         + ds['scalarSnowSublimation_mean']
                                         + ds['scalarCanopySublimation_mean'])
    ds['runoff'] = -ds['scalarTotalRunoff_mean'] * SEC_PER_DAY * MM_PER_M
    ds['swe'] = -ds['scalarSWE']
    ds['soil_moisture'] = -(ds['scalarTotalSoilLiq'] 
                                  + ds['scalarTotalSoilIce'])
    ds['canopy_moisture'] = -(ds['scalarCanopyIce']
                                    + ds['scalarCanopyLiq'])
    ds = ds[out_vars]
    return ds

summa_will = aggregate_summa(summa_will)
summa_snake = aggregate_summa(summa_snake)
summa_rockies = aggregate_summa(summa_rockies)
summa_olys = aggregate_summa(summa_olys)

In [None]:
def transform_from_latlon(lat, lon):
    lat = np.asarray(lat)
    lon = np.asarray(lon)
    trans = Affine.translation(lon[0], lat[0])
    scale = Affine.scale(lon[1] - lon[0], lat[1] - lat[0])
    return trans * scale

def rasterize(shapes, coords, latitude='lat', longitude='lon',
              fill=np.nan, **kwargs):
    """Rasterize a list of (geometry, fill_value) tuples onto the given
    xray coordinates. This only works for 1d latitude and longitude
    arrays.
    """
    transform = transform_from_latlon(coords[latitude], coords[longitude])
    out_shape = (len(coords[latitude]), len(coords[longitude]))
    raster = features.rasterize(shapes, out_shape=out_shape,
                                fill=fill, transform=transform,
                                dtype=float, **kwargs)
    spatial_coords = {latitude: coords[latitude], longitude: coords[longitude]}
    return xr.DataArray(raster, coords=spatial_coords, dims=(latitude, longitude))

In [None]:
WILLAMETTE = './data/subshapes/willamette4.shp'
SNAKE = './data/subshapes/snake.shp'
ROCKIES = './data/subshapes/can_rockies.shp'
OLYMPIC = './data/subshapes/olympics.shp'

gdf_will = gp.GeoDataFrame.from_file(WILLAMETTE)
gdf_will = gdf_will.to_crs({'init': 'epsg:4326'})

gdf_snake = gp.GeoDataFrame.from_file(SNAKE)
gdf_snake = gdf_snake.to_crs({'init': 'epsg:4326'})

gdf_rockies = gp.GeoDataFrame.from_file(ROCKIES)
gdf_rockies = gdf_rockies.to_crs({'init': 'epsg:4326'})

gdf_olys = gp.GeoDataFrame.from_file(OLYMPIC)
tot_hru = set(gdf_olys['hru'].values)
gdf_olys = gdf_olys.to_crs({'init': 'epsg:4326'})
gdf_olys = gdf_olys[gdf_olys['hru'].isin(summa_olys.hru.values)]
torun_hru = tot_hru - set(gdf_olys['hru'])

gdf_will = gdf_will[gdf_will['hru'].isin(summa_will.hru.values)]
gdf_snake = gdf_snake[gdf_snake['hru'].isin(summa_snake.hru.values)]
gdf_rockies = gdf_rockies[gdf_rockies['hru'].isin(summa_rockies.hru.values)]
gdf_olys = gdf_olys[gdf_olys['hru'].isin(summa_olys.hru.values)]

In [None]:
# Preprocess SUMMA
summa_will    = summa_will.sel(hru=gdf_will['hru'].values)
summa_snake   = summa_snake.sel(hru=gdf_snake['hru'].values)
summa_rockies = summa_rockies.sel(hru=gdf_rockies['hru'].values)
summa_olys    = summa_olys.sel(hru=gdf_olys['hru'].values)

regions = [summa_will, summa_snake, summa_rockies, summa_olys]
names = ['summa_will', 'summa_snake', 'summa_rockies', 'summa_olys']
for r, n in zip(regions, names):
    r.to_netcdf('./data/{}.nc'.format(n))

In [None]:
def scale_to_area(ds, gdf):
    wb_vars = ['precipitation', 'swe', 'evaporation', 
               'runoff', 'soil_moisture', 'canopy_moisture']
    for var in wb_vars:
        ds[var] = ds[var] * gdf['rel_area'].values
    return ds

In [None]:
summa_will_seas    = (scale_to_area(summa_will, gdf_will) 
                      / gdf_will['rel_area'].sum()).sum(dim='hru')
summa_snake_seas   = (scale_to_area(summa_snake, gdf_snake) 
                      / gdf_snake['rel_area'].sum()).sum(dim='hru')
summa_rockies_seas = (scale_to_area(summa_rockies, gdf_rockies) 
                      / gdf_rockies['rel_area'].sum()).sum(dim='hru')
summa_olys_seas    = (scale_to_area(summa_olys, gdf_olys) 
                      / gdf_olys['rel_area'].sum()).sum(dim='hru')

In [None]:
summa_will_seas.to_netcdf('./data/summa_will_info.nc')
summa_snake_seas.to_netcdf('./data/summa_snake_info.nc')
summa_rockies_seas.to_netcdf('./data/summa_rockies_info.nc')
summa_olys_seas.to_netcdf('./data/summa_olys_info.nc')

## VIC

In [None]:
# Load in regions
WILLAMETTE = './data/subshapes/willamette.shp'
SNAKE = './data/subshapes/snake.shp'
ROCKIES = './data/subshapes/can_rockies.shp'
OLYMPIC = './data/subshapes/olympics.shp'

gdf_will = gp.GeoDataFrame.from_file(WILLAMETTE)
gdf_will = gdf_will.to_crs({'init': 'epsg:4326'})

gdf_snake = gp.GeoDataFrame.from_file(SNAKE)
gdf_snake = gdf_snake.to_crs({'init': 'epsg:4326'})

gdf_rockies = gp.GeoDataFrame.from_file(ROCKIES)
gdf_rockies = gdf_rockies.to_crs({'init': 'epsg:4326'})

gdf_olys = gp.GeoDataFrame.from_file(OLYMPIC)
gdf_olys = gdf_olys.to_crs({'init': 'epsg:4326'})


In [None]:
vic_ds = xr.open_dataset(VIC_DATASET).sel(time=daily_slice)

vic_ds = vic_ds.sel(time=daily_slice)
vic_ds['totRunoff'] = vic_ds['Runoff'] + vic_ds['Baseflow']
vic_ds['soil_moisture'] = vic_ds['Soil_liquid'].sum(dim='soil_layers')
vic_ds['SWE'] += vic_ds['IWE']
vic_ds['canopy_moisture'] = vic_ds['Canopy_intcp_stor']
vic_rename_vars = {'Evaporation': 'evaporation',
                   'Precipitation': 'precipitation',
                   'totRunoff': 'runoff',
                   'soil_moisture': 'soil_moisture',
                   'canopy_moisture': 'canopy_moisture',
                   'SWE': 'swe'}
vic_ds = vic_ds.rename(vic_rename_vars)
vic_ds = vic_ds[out_vars]
vic_ds.to_netcdf('./data/vic_processed.nc')

In [None]:
# Preprocess VIC
shapes = zip(gdf_will.geometry, range(len(gdf_will)))
vic_ds['willamette'] = 0.0 * rasterize(shapes, vic_ds.isel(time=0).drop('time').coords) + 1.0
vic_will = vic_ds.where(vic_ds['willamette'] == 1.0, drop=True)

shapes = zip(gdf_snake.geometry, range(len(gdf_snake)))
vic_ds['snake'] = 0.0 * rasterize(shapes, vic_ds.isel(time=0).drop('time').coords) + 1.0
vic_snake = vic_ds.where(vic_ds['snake'] == 1.0, drop=True)

shapes = zip(gdf_rockies.geometry, range(len(gdf_rockies)))
vic_ds['rockies'] = 0.0 * rasterize(shapes, vic_ds.isel(time=0).drop('time').coords) + 1.0
vic_rockies = vic_ds.where(vic_ds['rockies'] == 1.0, drop=True)

shapes = zip(gdf_olys.geometry, range(len(gdf_olys)))
vic_ds['olys'] = 0.0 * rasterize(shapes, vic_ds.isel(time=0).drop('time').coords) + 1.0
vic_olys = vic_ds.where(vic_ds['olys'] == 1.0, drop=True)

In [None]:
regions = [vic_will, vic_snake, vic_rockies, vic_olys]
names = ['vic_will', 'vic_snake', 'vic_rockies', 'vic_olys']
for r, n in zip(regions, names):
    r.to_netcdf('./data/{}.nc'.format(n))

In [None]:
vic_will_seas = (vic_will.sum(dim=['lat', 'lon']) 
                 / vic_will['willamette'].sum(skipna=True).values)
vic_snake_seas = (vic_snake.sum(dim=['lat', 'lon']) 
                  / vic_snake['snake'].sum(skipna=True).values)
vic_rockies_seas = (vic_rockies.sum(dim=['lat', 'lon']) 
                    / vic_rockies['rockies'].sum(skipna=True).values)
vic_olys_seas = (vic_olys.sum(dim=['lat', 'lon']) 
                 /vic_olys['olys'].sum(skipna=True).values)

In [None]:
vic_will_seas.to_netcdf('./data/vic_will_info.nc')
vic_snake_seas.to_netcdf('./data/vic_snake_info.nc')
vic_rockies_seas.to_netcdf('./data/vic_rockies_info.nc')
vic_olys_seas.to_netcdf('./data/vic_olys_info.nc')

## PRMS

In [None]:
prms_ds = xr.open_dataset(PRMS_DATASET).sel(time=daily_slice)
prms_ds['Precipitation'] = vic_ds['precipitation']
prms_ds = prms_ds.sel(time=daily_slice)
prms_ds['Soil_moisture'] = prms_ds['Groundwater_storage'] + prms_ds['Soil_moisture']
prms_ds['Runoff'] = prms_ds['Groundwater_flow'] + prms_ds['Subsurface_flow'] + prms_ds['Surface_flow']
prms_ds['canopy_moisture'] = 0.0 * prms_ds['Soil_moisture']
prms_rename_vars = {'Evaporation': 'evaporation',
                    'Precipitation': 'precipitation',
                    'Runoff': 'runoff',
                    'Soil_moisture': 'soil_moisture',
                    'canopy_moisture': 'canopy_moisture',
                    'SWE': 'swe'}
prms_ds = prms_ds.rename(prms_rename_vars)
prms_ds = prms_ds[out_vars]
prms_ds.to_netcdf('./data/prms_processed.nc')

In [None]:
# Preprocess PRMS
shapes = zip(gdf_will.geometry, range(len(gdf_will)))
prms_ds['willamette'] = 0.0 * rasterize(shapes, prms_ds.isel(time=0).drop('time').coords) + 1.0
prms_will = prms_ds.where(prms_ds['willamette'] == 1.0, drop=True)

shapes = zip(gdf_snake.geometry, range(len(gdf_snake)))
prms_ds['snake'] = 0.0 * rasterize(shapes, prms_ds.isel(time=0).drop('time').coords) + 1.0
prms_snake = prms_ds.where(prms_ds['snake'] == 1.0, drop=True)

shapes = zip(gdf_rockies.geometry, range(len(gdf_rockies)))
prms_ds['rockies'] = 0.0 * rasterize(shapes, prms_ds.isel(time=0).drop('time').coords) + 1.0
prms_rockies = prms_ds.where(prms_ds['rockies'] == 1.0, drop=True)

shapes = zip(gdf_olys.geometry, range(len(gdf_olys)))
prms_ds['olys'] = 0.0 * rasterize(shapes, prms_ds.isel(time=0).drop('time').coords) + 1.0
prms_olys = prms_ds.where(prms_ds['olys'] == 1.0, drop=True)

regions = [prms_will, prms_snake, prms_rockies, prms_olys]
names = ['prms_will', 'prms_snake', 'prms_rockies', 'prms_olys']
for r, n in zip(regions, names):
    r.to_netcdf('./data/{}.nc'.format(n))

In [None]:
prms_will_seas = (prms_will.sum(dim=['lat', 'lon']) 
                  / prms_will['willamette'].sum(skipna=True).values)
prms_snake_seas = (prms_snake.sum(dim=['lat', 'lon']) 
                   / prms_snake['snake'].sum(skipna=True).values)
prms_rockies_seas = (prms_rockies.sum(dim=['lat', 'lon']) 
                     / prms_rockies['rockies'].sum(skipna=True).values)
prms_olys_seas = (prms_olys.sum(dim=['lat', 'lon']) 
                  / prms_olys['olys'].sum(skipna=True).values)

In [None]:
prms_will_seas.to_netcdf('./data/prms_will_info.nc')
prms_snake_seas.to_netcdf('./data/prms_snake_info.nc')
prms_rockies_seas.to_netcdf('./data/prms_rockies_info.nc')
prms_olys_seas.to_netcdf('./data/prms_olys_info.nc')