In [1]:
import numpy as np
import pandas as pd
import xarray as xr

from pathlib import Path

from itertools import product
from tqdm.contrib.itertools import product

import warnings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error

def get_bias(x, y): return np.mean(x - y)

def get_stats(x, y):
    if type(x) == xr.DataArray: x = x.values.ravel()
    if type(y) == xr.DataArray: y = y.values.ravel()
    idx = (~np.isnan(x)) & (~np.isnan(y))
    x, y = x[idx], y[idx]
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="An input array is constant")
        r, p = pearsonr(x, y)
    b = get_bias(x, y)
    mae = mean_absolute_error(x, y)
    rmse = mean_squared_error(x, y, squared = False)
    return r, b, mae, rmse

In [3]:
npy_dirs = Path('/bsuhome/zacharykeskinen/scratch/spicy/param_npys')

In [4]:
sds = [np.load(Path(fp).joinpath('lidar.npy')) for fp in npy_dirs.glob('*_*-*-*')]
sds = np.concatenate(sds)

In [5]:
# Create parameter space
A = np.round(np.arange(1, 3.1, 0.1), 2)
B = np.round(np.arange(0, 2.01, 0.1), 2)
C = np.round(np.arange(0, 1.001, 0.01), 2)
locs = [fp.stem for fp in npy_dirs.glob('*_*-*-*') if (fp.is_dir())]

In [6]:
da = xr.DataArray(np.zeros((len(A), len(B), len(C))) , coords = [A, B, C], dims = ['A', 'B', 'C'], name = 'pearsonr')
res = xr.merge([da, da.copy().rename('mae'), da.copy().rename('rmse'), da.copy().rename('bias')])

In [73]:
loc_dirs = list(npy_dirs.glob('*_*-*-*'))
for a, b, c in product(A, B, C):
    spicy = [np.load(fp.joinpath(f'{a}_{b}_{c}.npy')) for fp in loc_dirs]
    spicy = np.concatenate(spicy)
    r, bias, mae, rmse = get_stats(sds, spicy)
    for name, var in zip(['pearsonr', 'mae', 'rmse', 'bias'], [r, mae, rmse, bias]):
        res[name].loc[dict(A = a, B = b, C = c)] = var

  1%|          | 264/44541 [00:05<56:42, 13.01it/s] 

In [8]:
res.to_netcdf(npy_dirs.joinpath('grouped.nc'))