In [None]:
%pylab inline
import xarray as xr
import pandas as pd
import hyeenna as hy
import geopandas as gp
from scipy import stats
from rasterio import features
from affine import Affine
from scipy.stats.stats import pearsonr
from functools import reduce
from jupyterthemes import jtplot
import cartopy.crs as ccrs
import itertools

jtplot.style(jtplot.infer_theme(), fscale=2.2)
pd.set_option('precision', 4)

year1 = 50
year2 = 1
daily_slice = slice(-365*year1, -year2*365)
out_vars = ['evaporation', 'precipitation', 'runoff', 'swe', 'soil_moisture', 'canopy_moisture']

SEC_PER_DAY = 86400
MM_PER_M = 1000
ROUND = 5


def fix(ds):
    ds *= -1
    ds['precipitation'] *= -1
    ds['soil_moisture'] *= -1
    ds['swe'] *= -1
    return ds

In [None]:
plt.plot([0])
jtplot.figsize(x=20, y=16)
plt.clf()

In [None]:
summa_will = fix(xr.open_dataset('./data/summa_will_info.nc'))
summa_will['swe'] = subtract_yearly_min(summa_will['swe'])
summa_will['soil_moisture'] = subtract_yearly_min(summa_will['soil_moisture'])

summa_snake = fix(xr.open_dataset('./data/summa_snake_info.nc'))
summa_snake['swe'] = subtract_yearly_min(summa_snake['swe'])
summa_snake['soil_moisture'] = subtract_yearly_min(summa_snake['soil_moisture'])

summa_rockies = fix(xr.open_dataset('./data/summa_rockies_info.nc'))
summa_rockies['swe'] = subtract_yearly_min(summa_rockies['swe'])
summa_rockies['soil_moisture'] = subtract_yearly_min(summa_rockies['soil_moisture'])

summa_olys = fix(xr.open_dataset('./data/summa_olys_info.nc'))
summa_olys['swe'] = subtract_yearly_min(summa_olys['swe'])
summa_olys['soil_moisture'] = subtract_yearly_min(summa_olys['soil_moisture'])

vic_will = xr.open_dataset('./data/vic_will_info.nc')
vic_will['swe'] = subtract_yearly_min(vic_will['swe'])
vic_will['soil_moisture'] = subtract_yearly_min(vic_will['soil_moisture'])

vic_snake = xr.open_dataset('./data/vic_snake_info.nc')
vic_snake['swe'] = subtract_yearly_min(vic_snake['swe'])
vic_snake['soil_moisture'] = subtract_yearly_min(vic_snake['soil_moisture'])

vic_rockies = xr.open_dataset('./data/vic_rockies_info.nc')
vic_rockies['swe'] = subtract_yearly_min(vic_rockies['swe'])
vic_rockies['soil_moisture'] = subtract_yearly_min(vic_rockies['soil_moisture'])

vic_olys = xr.open_dataset('./data/vic_olys_info.nc')
vic_olys['swe'] = subtract_yearly_min(vic_olys['swe'])
vic_olys['soil_moisture'] = subtract_yearly_min(vic_olys['soil_moisture'])

prms_will = xr.open_dataset('./data/prms_will_info.nc')
prms_will['swe'] = subtract_yearly_min(prms_will['swe'])
prms_will['soil_moisture'] = subtract_yearly_min(prms_will['soil_moisture'])

prms_snake = xr.open_dataset('./data/prms_snake_info.nc')
prms_snake['swe'] = subtract_yearly_min(prms_snake['swe'])
prms_snake['soil_moisture'] = subtract_yearly_min(prms_snake['soil_moisture'])

prms_rockies = xr.open_dataset('./data/prms_rockies_info.nc')
prms_rockies['swe'] = subtract_yearly_min(prms_rockies['swe'])
prms_rockies['soil_moisture'] = subtract_yearly_min(prms_rockies['soil_moisture'])

prms_olys = xr.open_dataset('./data/prms_olys_info.nc')
prms_olys['swe'] = subtract_yearly_min(prms_olys['swe'])
prms_olys['soil_moisture'] = subtract_yearly_min(prms_olys['soil_moisture'])

In [None]:
analysis_dict = {
    'summa_will': summa_will,
    'summa_snake': summa_snake,
    'summa_rockies': summa_rockies,
    'summa_olys': summa_olys,
    'vic_will': vic_will,
    'vic_snake': vic_snake,
    'vic_rockies': vic_rockies,
    'vic_olys': vic_olys,
    'prms_will': prms_will,
    'prms_snake': prms_snake,
    'prms_rockies': prms_rockies,
    'prms_olys': prms_olys
}
for k, v in analysis_dict.items():
    print(k)
    #v['weekofyear'] = v.time.dt.weekofyear
    #analysis_dict[k] = v#.where(v.time.dt.season=='SON', drop=True)#resample(time='W').mean()
    #sub = v.resample(time='M').sum(dim='time')
    #sub['swe'].values           = v['swe'].resample(time='M').mean(dim='time')
    #sub['soil_moisture'].values = v['soil_moisture'].resample(time='M').mean(dim='time')
    
    sub = v.resample(time='W').sum(dim='time')
    sub['swe'].values           = v['swe'].resample(time='W').mean(dim='time')
    sub['soil_moisture'].values = v['soil_moisture'].resample(time='W').mean(dim='time')
    
    sub['weekofyear'] = sub.time.dt.weekofyear
    sub['month'] = sub.time.dt.month
    analysis_dict[k] = sub


In [None]:
def err_df(var, err_func):
    df = pd.DataFrame(columns=['Snake', 'Willamette', 'Rocky Mountains', 'Olympic Mountains'], 
                      index=['SUMMA-VIC', 'SUMMA-PRMS', 'VIC-PRMS'])
    df.loc['SUMMA-VIC']['Snake'] = err_func(summa_snake[var], vic_snake[var])
    df.loc['SUMMA-PRMS']['Snake'] = err_func(summa_snake[var], prms_snake[var])
    df.loc['VIC-PRMS']['Snake'] = err_func(vic_snake[var], prms_snake[var])
    
    df.loc['SUMMA-VIC']['Willamette'] = err_func(summa_will[var], vic_will[var])
    df.loc['SUMMA-PRMS']['Willamette'] = err_func(summa_will[var], prms_will[var])
    df.loc['VIC-PRMS']['Willamette'] = err_func(vic_will[var], prms_will[var])
    
    df.loc['SUMMA-VIC']['Rocky Mountains'] = err_func(summa_rockies[var], vic_rockies[var])
    df.loc['SUMMA-PRMS']['Rocky Mountains'] = err_func(summa_rockies[var], prms_rockies[var])
    df.loc['VIC-PRMS']['Rocky Mountains'] = err_func(vic_rockies[var], prms_rockies[var])
    
    df.loc['SUMMA-VIC']['Olympic Mountains'] = err_func(summa_olys[var], vic_olys[var])
    df.loc['SUMMA-PRMS']['Olympic Mountains'] = err_func(summa_olys[var], prms_olys[var])
    df.loc['VIC-PRMS']['Olympic Mountains'] = err_func(vic_olys[var], prms_olys[var])
    return df

In [None]:
def raw_data(data_array):
    return data_array.values.flatten()

def subtract_yearly_min(da):
    return (da.groupby(da.time.dt.year)
            .apply(lambda x: x - x.min(dim='time', skipna=True))
            .drop('year'))

def rmse(a, b):
    return np.round(np.sqrt(np.mean(np.power(a - b, 2))).values, ROUND)

def mae(a, b):
    return np.round(np.mean(np.abs(a-b)).values, ROUND)

def get_data(ds, dvars=True):
    evap     = raw_data(ds['evaporation'])[1:]
    runoff   = raw_data(ds['runoff'])[1:]
    precip   = raw_data(ds['precipitation'])[1:]
    if dvars:
        soil_liq = raw_data(ds['soil_moisture'].diff(dim='time'))
        swe      = raw_data(ds['swe'].diff(dim='time'))
        names = ['R', 'ET', 'P', 'ΔSM', 'ΔSWE']
    else:
        soil_liq = raw_data(ds['soil_moisture'])[1:]
        swe      = raw_data(ds['swe'])[1:]
        names = ['R', 'ET', 'P', 'SM', 'SWE']
    varlist = [runoff, evap, precip, soil_liq, swe]
    varlist = [((v - v.mean()) / v.std()).flatten() for v in varlist]
    varlist = [v.flatten() for v in varlist]
    return names, varlist

def kge(sim, obs):
    std_sim = np.std(sim)
    std_obs = np.std(obs)
    mean_sim = sim.mean(axis=0)
    mean_obs = obs.mean(axis=0)
    r_array = np.corrcoef(sim,obs)
    r = r_array[0,1]
    relvar = std_sim/std_obs
    bias = mean_sim/mean_obs
    kge = 1-np.sqrt(np.square(r-1) + np.square(relvar-1) + np.square(bias-1))
    return np.round(kge, ROUND)

def runoff_ratio(ds):
    return np.round((np.sum(ds['runoff']) / np.sum(ds['precipitation'])).values, ROUND)

def aridity_index(ds):
    return np.round((np.sum(ds['precipitation']) / np.sum(ds['evaporation'])).values, ROUND)

def data_dict(ds, dvars=True):
    n, v = get_data(ds, dvars)
    return {k: v for k, v in zip(n, v)}

def filter_data(filter_func, *args):
    args = args[0]
    mpdt = list(map(filter_func, args))
    good_inds = reduce(np.intersect1d, mpdt)
    args = [x[good_inds] for x in args]
    run = args[0]
    good_inds = np.argwhere(run>0)
    return [x[good_inds] for x in args]

def notnan(x):
    return np.argwhere(np.logical_and(~np.isnan(x), x>-1000))

def estimate_network_r(varlist: list, names: list,
                       nruns: int=10, sample_size: int=3000) -> pd.DataFrame:
    # Calculate all needed variable combinations
    mapping = {n: d for n, d in zip(names, varlist)}
    permutations = [list(l) for l in list(itertools.permutations(names, 2))]
    for combo in permutations:
        n = [n for n in names if n not in combo]
        [combo.append(nn) for nn in n]
    # Subsample data and put it together with combination list
    analysis_sets = []
    for combo in permutations:
        analysis_sets.append([mapping[c] for c in combo])
    # Compute scores
    scores = []
    for c, s in zip(permutations, analysis_sets):
        X = np.array(s[0])#.reshape(-1, 1)
        Y = np.array(s[1])#.reshape(-1, 1)
        scores.append(stats.pearsonr(X, Y)[0])
    df = pd.DataFrame(columns=names, index=names)
    for link, score in zip(permutations, scores):
        if score < 1e-4:
            score = 0
        df.loc[link[0], link[1]] = score
    for name, var in zip(names, varlist):
        df[name][name] = stats.pearsonr(var[1:], var[:-1])[0]
    return df


In [None]:
def ts_corr_coef(ds, key='weekofyear'):
    def _call(x, y):
        return pearsonr(x, y)[0]
    precip, et, swe, sm = [], [], [], []
    
    for woy in np.unique(ds[key]):
        woy2 = woy #- 1
        #if woy2 == 0: woy2 = np.unique(ds[key].values)[-1]
        ds1 = ds.where(ds[key] == woy, drop=True)
        ds2 = ds.where(ds[key] == woy2, drop=True)
        rnow = data_dict(ds1, dvars=True)['R']
        dd2 = data_dict(ds2, dvars=True)
        names, data1 = dd2.keys(), dd2.values()
        rnow, *data1 = filter_data(notnan, [rnow, *data1])
        
        names = ['R', 'ET', 'P', 'ΔSM', 'ΔSWE']
        et.append(_call(    rnow, data1[1]))
        precip.append(_call(rnow, data1[2]))
        sm.append(_call(    rnow, data1[3]))
        swe.append(_call(   rnow, data1[4]))
    
    precip = np.array(precip)
    et = np.array(et)
    swe = np.array(swe)
    sm = np.array(sm)
   
    weekly_df = pd.DataFrame(index=np.unique(ds[key]))
    smooth = lambda x: x
    weekly_df['P']    = np.roll(precip, 3)#14)
    weekly_df['ET']   = np.roll(et, 3)#14)
    weekly_df['ΔSM']  = np.roll(sm, 3)#14)
    weekly_df['ΔSWE'] = np.roll(swe, 3)#14)
    return weekly_df

In [None]:
loc = 'rockies'
summa_ts_df = ts_corr_coef(analysis_dict['summa_{}'.format(loc)], 'month')
vic_ts_df = ts_corr_coef(analysis_dict['vic_{}'.format(loc)], 'month')
prms_ts_df = ts_corr_coef(analysis_dict['prms_{}'.format(loc)], 'month')

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True)
ax = ax.flatten()
ymax = 1.1*np.max([summa_ts_df.sum(axis=1).max(),
               vic_ts_df.sum(axis=1).max(), 
               prms_ts_df.sum(axis=1).max()])
summa_ts_df.plot.line(color=['#c44e52', '#3472c6', '#8172b2', '#ff914d'], 
                      ax=ax[0], linewidth=4, legend=False)
vic_ts_df.plot.line(color=['#c44e52', '#3472c6', '#8172b2', '#ff914d'],
                      ax=ax[1], linewidth=4, legend=False)
prms_ts_df.plot.line(color=['#c44e52', '#3472c6', '#8172b2', '#ff914d'],
                     ax=ax[2], linewidth=4, legend=False)
ax[0].set_xticks([i for i in [4.2*i for i in range(1, 13)]])
ax[0].set_xticks([i for i in [i for i in range(1, 13)]])
months = ['Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep']
#months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
ax[2].set_xticklabels(months, rotation=30)
#ax[0].set_xlim([1, 53])
ax[0].set_xlim([1, 12])

patches, labels = ax[1].get_legend_handles_labels()
ax[1].legend(patches, labels, loc='best', bbox_to_anchor=(1.0, 0.75))


ax[0].set_ylim([-1, 1])
ax[1].set_ylim([-1, 1])
ax[2].set_ylim([-1, 1])

ax[0].axhline(0, color='black', linestyle='--')
ax[1].axhline(0, color='black', linestyle='--')
ax[2].axhline(0, color='black', linestyle='--')

ax[0].set_title('SUMMA')
ax[1].set_title('VIC')
ax[2].set_title('PRMS')
for a in ax:
    a.grid(False)
ax[1].set_ylabel('Correlation with runoff')

In [None]:
df = pd.DataFrame(columns=['Snake', 'Willamette', 'Rocky Mountains', 'Olympic Mountains'], 
                  index=['SUMMA', 'VIC', 'PRMS'])
df.loc['SUMMA']['Rocky Mountains'] = runoff_ratio(summa_rockies)
df.loc['SUMMA']['Snake'] = runoff_ratio(summa_snake)
df.loc['SUMMA']['Willamette'] = runoff_ratio(summa_will)
df.loc['SUMMA']['Olympic Mountains'] = runoff_ratio(summa_olys)

df.loc['VIC']['Rocky Mountains'] = runoff_ratio(vic_rockies)
df.loc['VIC']['Snake'] = runoff_ratio(vic_snake)
df.loc['VIC']['Willamette'] = runoff_ratio(vic_will)
df.loc['VIC']['Olympic Mountains'] = runoff_ratio(vic_olys)

df.loc['PRMS']['Rocky Mountains'] = runoff_ratio(prms_rockies)
df.loc['PRMS']['Snake'] = runoff_ratio(prms_snake)
df.loc['PRMS']['Willamette'] = runoff_ratio(prms_will)
df.loc['PRMS']['Olympic Mountains'] = runoff_ratio(prms_olys)
df

In [None]:
df = pd.DataFrame(columns=['Snake', 'Willamette', 'Rocky Mountains', 'Olympic Mountains'], 
                  index=['SUMMA', 'VIC', 'PRMS'])
df.loc['SUMMA']['Rocky Mountains'] = aridity_index(summa_rockies)
df.loc['SUMMA']['Snake'] = aridity_index(summa_snake)
df.loc['SUMMA']['Willamette'] = aridity_index(summa_will)
df.loc['SUMMA']['Olympic Mountains'] = aridity_index(summa_olys)

df.loc['VIC']['Rocky Mountains'] = aridity_index(vic_rockies)
df.loc['VIC']['Snake'] = aridity_index(vic_snake)
df.loc['VIC']['Willamette'] = aridity_index(vic_will)
df.loc['VIC']['Olympic Mountains'] = aridity_index(vic_olys)

df.loc['PRMS']['Rocky Mountains'] = aridity_index(prms_rockies)
df.loc['PRMS']['Snake'] = aridity_index(prms_snake)
df.loc['PRMS']['Willamette'] = aridity_index(prms_will)
df.loc['PRMS']['Olympic Mountains'] = aridity_index(prms_olys)
df

In [None]:
# do correlations between variables, say soil_moisture and runoff
print(stats.pearsonr(summa_snake['runoff'], summa_snake['soil_moisture'])[0])
print(stats.pearsonr(vic_snake['runoff'], vic_snake['soil_moisture'])[0])
print(stats.pearsonr(prms_snake['runoff'], prms_snake['soil_moisture'])[0])