In [1]:
from pathlib import Path

import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt

from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error

from spicy_snow.retrieval import retrieval_from_parameters

ModuleNotFoundError: No module named 'spicy_snow'

In [None]:
lievens_params = [2, 0.5, 0.44]
# wus_params = [2.5, 0.2, 0.55]
# wus_v2_params = [1.5, 0.1, 0.59]

in_dir = Path('~/spicy-snow/SnowEx-Data/').expanduser().resolve()
data_dir = Path('~/scratch/spicy/SnowEx-Data/').expanduser().resolve()

# Create parameter space
A = np.round(np.arange(1, 3.1, 0.1), 2)
B = np.round(np.arange(0, 2.01, 0.1), 2)
C = np.round(np.arange(0, 1.001, 0.01), 2)

def bias(x, y): return np.mean(x - y)

def get_stats(x, y):
    if type(x) == xr.DataArray: x = x.values.ravel()
    if type(y) == xr.DataArray: y = y.values.ravel()
    if type(x) == list: x = np.array(x)
    if type(y) == list: y = np.array(y)
    idx = (~np.isnan(x)) & (~np.isnan(y))
    x, y = x[idx], y[idx]
    r, p = pearsonr(x, y)
    b = bias(x, y)
    mae = mean_absolute_error(x, y)
    rmse = mean_squared_error(x, y, squared = False)
    return r, b, mae, rmse

In [None]:
npy_dirs = Path('/bsuhome/zacharykeskinen/scratch/spicy/param_npys')
all_res = xr.open_dataset(npy_dirs.joinpath('grouped.nc'))
print(all_res['pearsonr'].max(['B', 'C']).idxmax('A'))
print(all_res['pearsonr'].max(['C', 'A']).idxmax('B'))
print(all_res['mae'].min(['A', 'B']).idxmin('C'))

<xarray.DataArray 'A' ()>
array(1.5)
<xarray.DataArray 'B' ()>
array(0.)
<xarray.DataArray 'C' ()>
array(0.27)


In [None]:
# for fp in in_dir.glob('*.nc'):
#     ds = xr.open_dataset(fp)

#     if fp.stem == 'Frasier_2020-02-11':
#         r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = '2020-02-16'))
#         im_date = pd.to_datetime(ds.sel(time = '2020-02-16', method = 'nearest').time.values.ravel()[0])
#     else:
#         r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = ds.attrs['lidar-flight-time'], method = 'nearest'))
#         im_date = pd.to_datetime(ds.sel(time = ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])
#     d_days = im_date - pd.to_datetime(ds.attrs['lidar-flight-time'])
#     print(fp.stem)
#     print(d_days)

In [None]:
res = pd.DataFrame()
coarse_x = []
coarse_y = []

coarse_x_1k = []
coarse_y_1k = []
dss = {fp.stem: xr.open_dataset(fp) for fp in in_dir.glob('*.nc')}

for stem, ds in dss.items():

    if stem == 'Frasier_2020-02-11':
        r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = '2020-02-16'))
        im_date = pd.to_datetime('2020-02-16')
    else:
        r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = ds.attrs['lidar-flight-time'], method = 'nearest'))
        im_date = pd.to_datetime(ds.sel(time = ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])
    d_days = im_date - pd.to_datetime(ds.attrs['lidar-flight-time'])

    site_name = stem.replace('_', ' ').split('-')[0]

    # add RMSE and bias @ 90m
    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name] = var

    # for dry only
    idx = ds['wet_snow'].sel(time = im_date, method = 'nearest') == 0
    r, b, mae, rmse  = get_stats(ds['lidar-sd'].where(idx), ds['snow_depth'].sel(time = im_date, method = 'nearest').where(idx))
    for name, var in zip(['RMSE'], [rmse]):
        res.loc[site_name, name + ' (Dry)'] = var

    # @ 300 m
    ds_500 = ds.coarsen(x = 3, y = 3, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x.append(ds_500['lidar-sd'].values.ravel())
    coarse_y.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 300m'] = var

    # @ 500 m
    ds_500 = ds.coarsen(x = 6, y = 6, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x.append(ds_500['lidar-sd'].values.ravel())
    coarse_y.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 500m'] = var

    # @ 1 km

    ds_500 = ds.coarsen(x = 11, y = 11, boundary = 'pad').mean()
    r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
    coarse_x_1k.append(ds_500['lidar-sd'].values.ravel())
    coarse_y_1k.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

    for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
        res.loc[site_name, name+' @ 1km'] = var


for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name] = all_res.sel(A = 1.5, B = 0.1, C = 0.59)[name.lower().replace(' ', '')]

r, b, mae, rmse  = get_stats(np.concatenate(coarse_x).ravel(), np.concatenate(coarse_y).ravel())
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name+' @ 500m'] = var

r, b, mae, rmse  = get_stats(np.concatenate(coarse_x_1k).ravel(), np.concatenate(coarse_y_1k).ravel())
for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
    res.loc['All Sites', name+' @ 1km'] = var

In [None]:
# dss = {fp.stem: xr.open_dataset(fp) for fp in in_dir.glob('*.nc')}
# for cond in ['Dry', '<25% FCF', 'Dry, <25% FCF']:
#     for looks, spatial in zip([1, 6, 11], ['', ' @ 500m', ' @ 1km']):

#         xs= []
#         ys = []

#         for stem, ds in dss.items():
#             if stem == 'Frasier_2020-02-11':
#                 im_date = pd.to_datetime('2020-02-16')
#             else:
#                 im_date = pd.to_datetime(ds.sel(time = ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])
#             site_name = stem.replace('_', ' ').split('-')[0].replace('Little Cottonwood', 'LCC')
#             ds = ds.sel(time = im_date, method = 'nearest')

#             ds_500 = ds.coarsen(x = looks, y = looks, boundary = 'pad').mean()
        
#             if cond == 'Dry':
#                 ds_500 = ds_500.where(ds_500['wet_snow'] == 0)
#             if cond == '<25% FCF':
#                 ds_500 = ds_500.where(ds_500['fcf'] < 0.25)
#             if cond == 'Dry, <25% FCF':
#                 ds_500 = ds_500.where((ds_500['wet_snow'] == 0) & (ds_500['fcf']< 0.25))
#             xs.extend(ds_500['lidar-sd'].values.ravel())
#             ys.extend(ds_500['snow_depth'].values.ravel())
        
#         # xs, ys = np.array(xs), np.array(ys)

#         if np.sum(~np.isnan(xs)) < 5:
#             continue
#         r, b, mae, rmse = get_stats(xs, ys)

#         for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#             res.loc[f'All Sites ({cond})', name+spatial] = float(var)


In [None]:
# res.to_csv('/bsuhome/zacharykeskinen/spicy-analysis/results/table1.csv')

In [None]:
# res = pd.DataFrame()
# coarse_x = []
# coarse_y = []

# coarse_x_1k = []
# coarse_y_1k = []
# for fp in in_dir.glob('*.nc'):
#     ds = xr.open_dataset(fp)

#     if fp.stem == 'Frasier_2020-02-11':
#         r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = '2020-02-16'))
#         im_date = pd.to_datetime('2020-02-16')
#     else:
#         r, b, mae, rmse = get_stats(ds['lidar-sd'], ds['snow_depth'].sel(time = ds.attrs['lidar-flight-time'], method = 'nearest'))
#         im_date = pd.to_datetime(ds.sel(time = ds.attrs['lidar-flight-time'], method = 'nearest').time.values.ravel()[0])
#     d_days = im_date - pd.to_datetime(ds.attrs['lidar-flight-time'])

#     site_name = fp.stem.replace('_', ' ').split('-')[0]
#     for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#         res.loc[site_name, name] = var

#     ds_500 = ds.coarsen(x = 6, y = 6, boundary = 'pad').mean()
#     r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
#     coarse_x.append(ds_500['lidar-sd'].values.ravel())
#     coarse_y.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

#     for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#         res.loc[site_name, name+' @ 500m'] = var

#     ds_500 = ds.coarsen(x = 11, y = 11, boundary = 'pad').mean()
#     r, b, mae, rmse  = get_stats(ds_500['lidar-sd'], ds_500['snow_depth'].sel(time = im_date, method = 'nearest'))
#     coarse_x_1k.append(ds_500['lidar-sd'].values.ravel())
#     coarse_y_1k.append(ds_500['snow_depth'].sel(time = im_date, method = 'nearest').values.ravel())

#     for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#         res.loc[site_name, name+' @ 1km'] = var

#     # lievens_ds = retrieval_from_parameters(xr.open_dataset(data_dir.joinpath(fp.name)), A = lievens_params[0], B = lievens_params[1], C = lievens_params[2])

#     # o_r, o_b, o_mae, o_rmse = get_stats(lievens_ds['lidar-sd'], lievens_ds['snow_depth'].sel(time = im_date, method = 'nearest'))

#     # res.loc[site_name, 'Lievens (2022) rmse'] = o_rmse
#     # res.loc[site_name, 'Lievens (2022) Pearson R'] = o_r

# for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#     res.loc['All Sites', name] = all_res.sel(A = 1.5, B = 0.1, C = 0.59)[name.lower().replace(' ', '')]
# # res.loc['All Sites', 'Lievens (2022) rmse'] = all_res.sel(A = lievens_params[0], B = lievens_params[1], C = lievens_params[2])['rmse']
# # res.loc['All Sites', 'Lievens (2022) Pearson R'] = all_res.sel(A = lievens_params[0], B = lievens_params[1], C = lievens_params[2])['pearsonr']

# r, b, mae, rmse  = get_stats(np.concatenate(coarse_x).ravel(), np.concatenate(coarse_y).ravel())
# for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#     res.loc['All Sites', name+' @ 500m'] = var

# r, b, mae, rmse  = get_stats(np.concatenate(coarse_x_1k).ravel(), np.concatenate(coarse_y_1k).ravel())
# for name, var in zip(['RMSE', 'Pearson R'], [rmse, r]):
#     res.loc['All Sites', name+' @ 1km'] = var