In [3]:
from pathlib import Path

import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt

from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error

from spicy_snow.retrieval import retrieval_from_parameters

In [4]:
lievens_params = [2, 0.5, 0.44]
# wus_params = [2.5, 0.2, 0.55]
# wus_v2_params = [1.5, 0.1, 0.59]

in_dir = Path('~/scratch/spicy/SnowEx-Data/').expanduser().resolve()
in_dir = Path('/Users/rdcrlzh1/Documents/spicy-analysis/data/SnowEx-Data')
data_dir = Path('~/scratch/spicy/SnowEx-Data/').expanduser().resolve()

def bias(x, y): return np.mean(x - y)

def get_stats(x, y, nrmse = False):
    if type(x) == xr.DataArray: x = x.values.ravel()
    if type(y) == xr.DataArray: y = y.values.ravel()
    if type(x) == list: x = np.array(x)
    if type(y) == list: y = np.array(y)
    idx = (~np.isnan(x)) & (~np.isnan(y))
    x, y = x[idx], y[idx]
    r, p = pearsonr(x, y)
    b = bias(x, y)
    mae = mean_absolute_error(x, y)
    rmse = mean_squared_error(x, y, squared = False)

    if nrmse:
        nrmse_value = rmse / np.mean(x)
        return r, b, mae, rmse, nrmse_value

    return r, b, mae, rmse

from scipy.stats import norm
def fischerz(truth, x1, x2):
    idx1 = (~np.isnan(truth)) & (~np.isnan(x1))
    idx2 = (~np.isnan(truth)) & (~np.isnan(x2))
    n = np.min([len(x1[idx1]), len(x2[idx2])])
    cor1 = pearsonr(truth[idx1], x1[idx1]).statistic
    cor2 = pearsonr(truth[idx2], x2[idx2]).statistic
    fischer1 = 0.5*np.log((1+cor1)/(1-cor1))
    fischer2 = 0.5*np.log((1+cor2)/(1-cor2))
    expected_sd = np.sqrt(1/(n-3))
    return 2 * (1 - norm(0, expected_sd).cdf(np.abs(fischer1 - fischer2)))

In [5]:
npy_dirs = Path('/bsuhome/zacharykeskinen/scratch/spicy/param_npys')
all_res = xr.open_dataset(npy_dirs.joinpath('grouped.nc'))
all_res_dry = xr.open_dataset(npy_dirs.joinpath('dry_grouped.nc'))
A = all_res.max('B').min('C')['pearsonr'].idxmax('A')
B = all_res['pearsonr'].max('A').min('C').idxmax('B')
print(A)
print(B)
print(all_res['mae'].sel(A = A, B = B).idxmin('C'))

FileNotFoundError: [Errno 2] No such file or directory: '/bsuhome/zacharykeskinen/scratch/spicy/param_npys/grouped.nc'

new table with WUS

In [10]:
res = pd.DataFrame()

dss = {fp.stem: xr.open_dataset(fp) for fp in in_dir.glob('*.nc')}
mean_all = 0
xs, ys = [], []
dry_xs, dry_ys = [], []
for stem, full_ds in dss.items():

    if stem == 'Frasier_2020-02-11':
        im_date = pd.to_datetime('2020-02-16')
    else:
        im_date = pd.to_datetime(full_ds.sel(time = full_ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])

    d_days = im_date - pd.to_datetime(full_ds.attrs['lidar-flight-time'])
    site_name = stem.replace('_', ' ').replace('Frasier', 'Fraser').split('-')[0]


    ds = full_ds.sel(time = im_date, method = 'nearest')
    mean_all += ds['snow_depth'].mean()

    full_r, full_b, full_mae, full_rmse, full_nrmse = get_stats(ds['lidar-sd'], ds['snow_depth'], nrmse = True)
    # add RMSE and bias @ 90m
    for name, var in zip(['RMSE', 'Pearson R', 'nRMSE'], [full_rmse, full_r, full_nrmse]):
        res.loc[site_name, name] = var
    ys.extend(ds['lidar-sd'].data.ravel())
    xs.extend(ds['snow_depth'].data.ravel())

    # for dry only
    idx = ds['wet_snow'] == 0
    r, b, mae, rmse, nrmse  = get_stats(ds['lidar-sd'].where(idx), ds['snow_depth'].where(idx), nrmse = True)
    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name + ' (Dry)'] = var
    res.loc[site_name, 'dry Fischer'] = fischerz(ds['lidar-sd'].data.ravel(), ds['snow_depth'].data.ravel(), ds['snow_depth'].where(idx).data.ravel())
    dry_ys.extend(ds['lidar-sd'].where(idx).data.ravel())
    dry_xs.extend(ds['snow_depth'].where(idx).data.ravel())
    
    # WUS over Lievens Improvement RMSE
    # l_ds = retrieval_from_parameters(full_ds, A = lievens_params[0], B = lievens_params[1], C = lievens_params[2]).sel(time = im_date, method = 'nearest')
    # l_r, l_b, l_mae, l_rmse, l_nrmse = get_stats(l_ds['lidar-sd'], l_ds['snow_depth'], nrmse = True)
    # res.loc[site_name, 'L22 RMSE'] = l_rmse
    # res.loc[site_name, 'L22 R'] = l_r
    # res.loc[site_name, 'L22 Fischer'] = fischerz(l_ds['lidar-sd'].data.ravel(), l_ds['snow_depth'].data.ravel(), ds['snow_depth'].data.ravel())
    res.loc[site_name, 'mean-all-sd'] = ds['lidar-sd'].mean().data.ravel()[0]
    res.loc[site_name, 'mean-dry-sd'] = ds['lidar-sd'].where(idx).mean().data.ravel()[0]

r, b, mae, rmse = get_stats(xs, ys)
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    # res.loc['All Sites', name] = all_res.sel(A = 1.5, B = 0.1, C = 0.60)[name.lower().replace(' ', '')]
    res.loc['All Sites', name] = var
# res.loc['All Site', 'nRMSE'] = all_res.sel(A = 1.5, B = 0.1, C = 0.60)['rmse'] /(mean_all / (len(dss)))

r, b, mae, rmse = get_stats(dry_xs, dry_ys)
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    # res.loc[site_name, name + ' (Dry)'] = all_res_dry.sel(A = 1.5, B = 0.1, C = 0.60)[name.lower().replace(' ', '')]
    res.loc['All Sites', name + ' (Dry)'] = var
res.loc['All Sites', 'dry Fischer'] = fischerz(xr.DataArray(ys), xr.DataArray(xs), xr.DataArray(dry_xs))

res.loc['All Sites', 'mean-all-sd'] = np.nanmean(ys)
res.loc['All Sites', 'mean-dry-sd'] = np.nanmean(dry_ys)
# res.loc['All Sites', 'L22 RMSE'] = all_res.sel(A = 1.5, B = 0.1, C = 0.60)['rmse'] - all_res.sel(A = lievens_params[0], B = lievens_params[1], C = lievens_params[2])['rmse']
# res.loc['All Sites', 'L22 R'] = all_res.sel(A = 1.5, B = 0.1, C = 0.60)['pearsonr'] - all_res.sel(A = lievens_params[0], B = lievens_params[1], C = lievens_params[2])['pearsonr']

In [11]:
res = res.applymap(lambda x: f'{x:.2f}').reindex(['All Sites', 'Banner 2020', 'Banner 2021', 'Dry Creek 2020',\
     'Fraser 2020', 'Fraser 2021', 'Little Cottonwood 2021', 'Mores 2020', 'Mores 2021', 'Cameron 2021'])
res.drop(['nRMSE'], axis = 1)

  res = res.applymap(lambda x: f'{x:.2f}').reindex(['All Sites', 'Banner 2020', 'Banner 2021', 'Dry Creek 2020',\


Unnamed: 0,RMSE,Pearson R,RMSE (Dry),Pearson R (Dry),dry Fischer,mean-all-sd,mean-dry-sd
All Sites,0.92,0.46,1.01,0.46,0.64,1.41,1.62
Banner 2020,1.0,0.4,0.92,0.37,0.05,1.51,1.67
Banner 2021,0.89,0.42,1.14,0.49,0.0,1.48,1.58
Dry Creek 2020,0.74,0.21,0.78,0.24,0.23,1.05,1.1
Fraser 2020,0.93,0.38,1.26,0.14,0.0,1.11,1.3
Fraser 2021,0.65,0.18,0.79,0.44,0.0,0.86,1.15
Little Cottonwood 2021,1.07,0.54,1.17,0.51,0.25,1.81,2.0
Mores 2020,1.07,0.08,0.97,0.19,0.0,1.79,1.76
Mores 2021,0.91,0.4,0.91,0.34,0.09,1.6,1.79
Cameron 2021,1.07,0.02,0.93,0.13,0.14,1.41,1.35


# old table with spatial resolutions

In [31]:
res = pd.DataFrame()
coarse_x = []
coarse_y = []

coarse_x_1k = []
coarse_y_1k = []
dss = {fp.stem: xr.open_dataset(fp) for fp in in_dir.glob('*.nc')}

for stem, ds in dss.items():

    if stem == 'Frasier_2020-02-11':
        r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = '2020-02-16'))
        im_date = pd.to_datetime('2020-02-16')
    else:
        r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = ds.attrs['lidar-flight-time'], method = 'nearest'))
        im_date = pd.to_datetime(ds.sel(time = ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])
    d_days = im_date - pd.to_datetime(ds.attrs['lidar-flight-time'])

    site_name = stem.replace('_', ' ').split('-')[0]

    # add RMSE and bias @ 90m
    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name] = var

    # for dry only
    idx = ds['wet_snow'].sel(time = im_date, method = 'nearest') == 0
    r, b, mae, rmse  = get_stats(ds['lidar-sd'].where(idx), ds['snow_depth'].sel(time = im_date, method = 'nearest').where(idx))
    for name, var in zip(['RMSE'], [rmse]):
        res.loc[site_name, name + ' (Dry)'] = var

    # @ 300 m
    ds_500 = ds.coarsen(x = 3, y = 3, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x.append(ds_500['lidar-sd'].values.ravel())
    coarse_y.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 300m'] = var

    # @ 500 m
    ds_500 = ds.coarsen(x = 6, y = 6, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x.append(ds_500['lidar-sd'].values.ravel())
    coarse_y.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 500m'] = var

    # @ 1 km

    ds_500 = ds.coarsen(x = 11, y = 11, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x_1k.append(ds_500['lidar-sd'].values.ravel())
    coarse_y_1k.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 1km'] = var


for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name] = all_res.sel(A = 1.5, B = 0.1, C = 0.59)[name.lower().replace(' ', '')]

r, b, mae, rmse  = get_stats(np.concatenate(coarse_x).ravel(), np.concatenate(coarse_y).ravel())
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name+' @ 500m'] = var

r, b, mae, rmse  = get_stats(np.concatenate(coarse_x_1k).ravel(), np.concatenate(coarse_y_1k).ravel())
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name+' @ 1km'] = var