Skip to content

Commit

Permalink
refactor: move max_prob_diff to comparisons registry
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Dec 13, 2021
1 parent c6a1ca7 commit 47336bc
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 7 deletions.
4 changes: 1 addition & 3 deletions popmon/analysis/comparison/hist_comparer.py
Expand Up @@ -40,7 +40,7 @@
)
from ...base import Pipeline
from ...hist.hist_utils import COMMON_HIST_TYPES, is_numeric
from ...stats.numpy import googl_test, ks_prob, ks_test, uu_chi2
from ...stats.numpy import ks_prob, ks_test, uu_chi2


def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
Expand Down Expand Up @@ -70,7 +70,6 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
"chi2_pvalue": np.nan,
"chi2_max_residual": np.nan,
"chi2_spike_count": np.nan,
"max_prob_diff": np.nan,
"unknown_labels": np.nan,
}

Expand Down Expand Up @@ -137,7 +136,6 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
x["chi2_pvalue"] = pvalue
x["chi2_max_residual"] = max(list(map(abs, res)))
x["chi2_spike_count"] = sum(abs(r) > max_res_bound for r in res)
x["max_prob_diff"] = googl_test(*entries_list)
for key, func in Comparisons.get_comparisons().items():
x[key] = func(*entries_list)
return pd.Series(x)
Expand Down
3 changes: 0 additions & 3 deletions popmon/config.py
Expand Up @@ -52,8 +52,6 @@
+ "(one histogram in a time slot and one in {ref})",
"chi2_spike_count": "The number of normalized residuals of all bin pairs (one histogram in a time"
+ " slot and one in {ref}) with absolute value bigger than a given threshold (default: 7).",
"max_prob_diff": "The largest absolute difference between all bin pairs of two normalized "
+ "histograms (one histogram in a time slot and one in {ref})",
"unknown_labels": "Are categories observed in a given time slot that are not present in {ref}?",
}
comparisons.update(Comparisons.get_descriptions())
Expand Down Expand Up @@ -100,7 +98,6 @@
"*unknown_labels*",
"*chi2_norm*",
"*ks*",
"*max_prob_diff*",
"*zscore*",
"n_*",
"worst",
Expand Down
2 changes: 1 addition & 1 deletion popmon/pipeline/metrics_pipelines.py
Expand Up @@ -141,7 +141,7 @@ def __init__(
suffix_mean="_mean",
suffix_std="_std",
suffix_pull="_pull",
metrics=["ref_max_prob_diff"] + [f"ref_{key}" for key in Comparisons.get_comparisons().keys()],
metrics=[f"ref_{key}" for key in Comparisons.get_comparisons().keys()],
),
# 4. profiling of histograms, then pull calculation compared with reference mean and std,
# to obtain normalized residuals of profiles
Expand Down
4 changes: 4 additions & 0 deletions popmon/stats/numpy.py
Expand Up @@ -346,6 +346,10 @@ def ks_prob(testscore):
return pvalue


@Comparisons.register(
key="max_prob_diff",
description="The largest absolute difference between all bin pairs of two normalized histograms (one histogram in a time slot and one in {ref})",
)
def googl_test(bins_1, bins_2):
"""Google-paper test
Expand Down

0 comments on commit 47336bc

Please sign in to comment.