In [1]:
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
param_fp = Path('/bsuhome/zacharykeskinen/scratch/spicy/param_npys')
ds = xr.open_dataset(param_fp.joinpath('param_stats.nc'))
ds = ds.sel(C = slice(0.01, 1))

fig_dir = Path('/bsuhome/zacharykeskinen/spicy-analysis/figures/params')

In [3]:
res = pd.DataFrame(index = ds.location, columns = ['A', 'B', 'C', 'rmse', 'bias', 'mae', 'pearsonr', 'rmse improvement', '$\delta rmse / \delta A$', '$\delta rmse / \delta B$', '$\delta rmse / \delta C$'])

In [4]:
gp = [2, 0.5, 0.44]

for loc, sub in ds.groupby('location'):
    sub = sub.mean('iteration')
    c_best = sub.sel(C = sub['mae'].idxmin('C'))
    a_c_best = c_best.sel(A = c_best['pearsonr'].idxmax('A'))
    best = a_c_best.sel(B = a_c_best['pearsonr'].idxmax('B'))
    for stat in ['A', 'B', 'C']:
        res.loc[loc, stat] = best[stat].values.ravel()[0]

        if stat == 'A':
            c_best = sub.sel(C = sub['mae'].idxmin('C'))
            sub_best = c_best.sel(B = c_best['pearsonr'].idxmax('B'))
        if stat == 'B':
            c_best = sub.sel(C = sub['mae'].idxmin('C'))
            sub_best = c_best.sel(A = c_best['pearsonr'].idxmax('A'))
        if stat == 'C':
            a_best = sub.sel(A = c_best['pearsonr'].idxmax('A'))
            sub_best = a_best.sel(B = a_best['pearsonr'].idxmax('B'))

        d_stat = sub_best['rmse'].differentiate(stat).mean()
        res.loc[loc, f'$\delta rmse / \delta {stat}$'] = d_stat.values.ravel()[0]
    
    for var in best.data_vars:
        res.loc[loc, var] = best[var].values.ravel()[0]

    rmse_improve = best['rmse'] - sub.sel(A = gp[0], B = gp[1], C = gp[2])['rmse']
    res.loc[loc, 'rmse improvement'] = rmse_improve.values.ravel()[0]

In [5]:
res = res.reset_index(names = 'location')

In [6]:
for col in res.columns:
    if col in ['A', 'B', 'C']:
        continue
    try:
        res[col] = res[col].apply(lambda r: f'{r:.3f}')
    except:
        pass

In [7]:
res['location'] = res['location'].apply(lambda x: x.split('-')[0].replace('_',' '))

In [11]:
for col in res.columns:
    try:
        res[col] = res[col].astype(float)
    except:
        pass

In [14]:
res.mean(axis = 0)

  res.mean(axis = 0)


A                           1.255556
B                           0.233333
C                           0.815556
rmse                        0.995889
bias                       -0.512333
mae                         0.836333
pearsonr                    0.246222
rmse improvement            0.064667
$\delta rmse / \delta A$   -0.040222
$\delta rmse / \delta B$   -0.075778
$\delta rmse / \delta C$   -0.170333
dtype: float64

In [88]:
res.to_csv('/bsuhome/zacharykeskinen/spicy-analysis/results/params/param_site_summary.csv')