# Test methods for adjusting p-values

To account for false discovery rate

In [1]:
import sys
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import contextily as ctx
import matplotlib.pyplot as plt

from multipy.fdr import lsu, qvalue, abh, tst, orc
from multipy.fwer import sidak, hochberg
from multipy.rft import rft_2d
from multipy.ihw import ihw_naive

sys.path.append('/g/data/os22/chad_tmp/dea-notebooks/Tools/')
from dea_tools.classification import sklearn_flatten, sklearn_unflatten
from scipy import stats



## Open per pixel phenology data

In [2]:
lin_or_circ = 'circular'

p_average = xr.open_dataset(f'/g/data/os22/chad_tmp/Aus_phenology/results/mean_phenology_perpixel_{lin_or_circ}_final.nc')
p_trends = xr.open_dataset(f'/g/data/os22/chad_tmp/Aus_phenology/results/trends_phenology_perpixel_{lin_or_circ}_final.nc')

## Define mask

In [3]:
season_per_year = p_average['n_seasons']/p_average['n_years']
non_seasonal = xr.where((season_per_year <= 0.90),1,0)
extra_seasonal = xr.where((season_per_year >= 1.1),1,0)
seasonality_mask = (non_seasonal | extra_seasonal)

sos_nan = np.isnan(p_trends['SOS_slope']).where(~np.isnan(p_average['SOS']))
sos_nan = xr.where((sos_nan==1) & (seasonality_mask==0), 1, 0)

eos_nan = np.isnan(p_trends['EOS_slope']).where(~np.isnan(p_average['EOS']))
eos_nan = xr.where((eos_nan==1) & (seasonality_mask==0), 1, 0)

pos_nan = np.isnan(p_trends['POS_slope']).where(~np.isnan(p_average['POS']))
pos_nan = xr.where((pos_nan==1) & (seasonality_mask==0), 1, 0)

extra_mask = (seasonality_mask | sos_nan | eos_nan | pos_nan)

## Adjust p-values

In [4]:
var='POS'
# p_trends[f'{var}_p_value'].where(extra_mask!=1).plot(robust=True)

trends = p_trends[var+'_slope'].where(extra_mask!=1)
pval = p_trends[var+'_p_value'].where(extra_mask!=1)

total_nonnan = (~np.isnan(trends)).sum().values.item()
total_significant = (~np.isnan(trends.where(pval<=0.05))).sum()

total_percent_changed = total_significant / total_nonnan * 100
print(f'{total_percent_changed:.2f} % of valid pixels in Australia has seen significant change in {var} (uncorrected p-values)')

19.23 % of valid pixels in Australia has seen significant change in POS (uncorrected p-values)


In [None]:
# d = sklearn_flatten(p_trends[f'{var}_p_value'].where(extra_mask!=1).rename({'latitude':'y', 'longitude':'x'}))
# mask = ~np.isnan(d) 

# # p_adj = stats.false_discovery_control(d[mask], method='bh')
# # p_adj = lsu(d[mask], q=0.05)
# # p_adj = abh(d[mask])
# p_adj = tst(d[mask])
# # p_adj = orc(d[mask], m0 = np.sum(d[mask]>0.05))
# # p_adj = ihw_naive(d[mask], groups=100, method=lsu)
# # p_adj = hochberg(d[mask])
# # p_adj = rft_2d(p_trends[f'{var}_p_value'].where(extra_mask!=1).fillna(1).data,
# #                fwhm=3.0
# # )



In [None]:
d = sklearn_flatten(p_trends[f'{var}_p_value'].where(extra_mask!=1).rename({'latitude':'y', 'longitude':'x'}))
mask = ~np.isnan(d) 

p_adj = stats.false_discovery_control(d[mask], method='bh')
# p_adj = lsu(d[mask], q=0.1)
# p_adj = tst(d[mask], q=0.1)

p_adj = sklearn_unflatten(p_adj, p_trends[f'{var}_p_value'].where(extra_mask!=1).rename({'latitude':'y', 'longitude':'x'}))
p_adj = p_adj.transpose().rename({'y':'latitude', 'x':'longitude'})#.plot(robust=True)

# sig_mask = xr.where(p_adj==1, 1, 0)
# p_adj = pval.where(sig_mask)

total_nonnan = (~np.isnan(trends)).sum().values.item()
total_significant = (~np.isnan(trends.where(p_adj<=0.1))).sum() #setting q as 0.1 effectively

total_percent_changed = total_significant / total_nonnan * 100
print(f'{total_percent_changed:.2f} % of valid pixels in Australia has seen significant change in {var} (corrected p-values)')

In [None]:
fig,ax=plt.subplots(1,2, figsize=(11,5))

p_trends[f'{var}_p_value'].where(p_trends[f'{var}_p_value'].where(extra_mask!=1)<=0.05).plot(robust=True, ax=ax[0], add_labels=False, add_colorbar=False)

p_adj.where(p_adj<=0.1).plot(robust=True, ax=ax[1], add_labels=False, add_colorbar=False)

ax[0].set_title('Uncorrected p-values')
ax[1].set_title('Corrected p-values');

## Compare p-values

In [None]:
df = p_adj.to_dataframe(name='p-adjusted')
df['p-original'] = pval.to_dataframe(name='p-original').drop('spatial_ref', axis=1)
df = df.dropna().reset_index(drop=True)

In [None]:
df.plot.scatter(x='p-original', y='p-adjusted')
plt.title(f'{var} p-value adjusted using FDR')
plt.plot([0, 1], [0, 1], ls="--", c=".3");