Skip to content

Commit

Permalink
refactor: simplification
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Nov 24, 2021
1 parent 66141b2 commit 98acf68
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 26 deletions.
48 changes: 25 additions & 23 deletions popmon/analysis/comparison/hist_comparer.py
Expand Up @@ -57,20 +57,20 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
Default is 7.0.
:return: pandas Series with popular comparison metrics.
"""
x = pd.Series()
x["ks"] = np.nan
x["ks_zscore"] = np.nan
x["ks_pvalue"] = np.nan
x["pearson"] = np.nan
x["chi2"] = np.nan
x["chi2_norm"] = np.nan
x["chi2_zscore"] = np.nan
x["chi2_pvalue"] = np.nan
x["chi2_max_residual"] = np.nan
x["chi2_spike_count"] = np.nan
x["max_prob_diff"] = np.nan
unknown_labels = np.nan
x["unknown_labels"] = unknown_labels
x = {
"ks": np.nan,
"ks_zscore": np.nan,
"ks_pvalue": np.nan,
"pearson": np.nan,
"chi2": np.nan,
"chi2_norm": np.nan,
"chi2_zscore": np.nan,
"chi2_pvalue": np.nan,
"chi2_max_residual": np.nan,
"chi2_spike_count": np.nan,
"max_prob_diff": np.nan,
"unknown_labels": np.nan,
}

# basic name checks
cols = row.index.to_list()
Expand All @@ -83,15 +83,14 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
# basic histogram checks
hist1 = row[hist_name1]
hist2 = row[hist_name2]
if not all([isinstance(hist, COMMON_HIST_TYPES) for hist in [hist1, hist2]]):
return x
if not check_similar_hists([hist1, hist2]):
return x
if not all(
[isinstance(hist, COMMON_HIST_TYPES) for hist in [hist1, hist2]]
) or not check_similar_hists([hist1, hist2]):
return pd.Series(x)

# compare
is_num = is_numeric(hist1)
if hist1.n_dim == 1:
if is_num:
if is_numeric(hist1):
numpy_1dhists = get_consistent_numpy_1dhists([hist1, hist2])
entries_list = [nphist[0] for nphist in numpy_1dhists]
# KS-test only properly defined for (ordered) 1D interval variables
Expand All @@ -106,10 +105,14 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
labels1 = hist1.bin_labels()
labels2 = hist2.bin_labels()
subset = set(labels1) <= set(labels2)
unknown_labels = int(not subset)
x["unknown_labels"] = int(not subset)
elif hist1.n_dim == 2:
numpy_2dgrids = get_consistent_numpy_2dgrids([hist1, hist2])
entries_list = [entry.flatten() for entry in numpy_2dgrids]
else:
raise NotImplementedError(
f"histogram with dimension {hist1.n_dim} is not supported"
)

# calculate pearson coefficient
pearson, pvalue = (np.nan, np.nan)
Expand All @@ -130,8 +133,7 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
x["chi2_max_residual"] = max(list(map(abs, res)))
x["chi2_spike_count"] = sum(abs(r) > max_res_bound for r in res)
x["max_prob_diff"] = googl_test(*entries_list)
x["unknown_labels"] = unknown_labels
return x
return pd.Series(x)


class HistComparer(Pipeline):
Expand Down
5 changes: 3 additions & 2 deletions popmon/analysis/profiling/hist_profiler.py
Expand Up @@ -148,7 +148,7 @@ def _profile_1d_histogram(self, name, hist):
for f_name, result in zip(name, results)
]

profile.update({k: v for k, v in zip(names, results)})
profile.update(dict(zip(names, results)))
elif not is_num:
profile["fraction_true"] = pm_np.fraction_of_true(bin_labels, bin_counts)

Expand Down Expand Up @@ -190,7 +190,6 @@ def _profile_hist(self, split, hist_name):
is_num = is_numeric(hist0)

# these are the profiled quantities we will monitor
fields = []
if dimension == 1:
fields = list(self.general_stats_1d)
fields += (
Expand All @@ -200,6 +199,8 @@ def _profile_hist(self, split, hist_name):
)
elif dimension == 2:
fields = list(self.general_stats_2d)
else:
fields = []

# now loop over split-axis, e.g. time index, and profile each sub-hist x:y
profile_list = []
Expand Down
2 changes: 1 addition & 1 deletion popmon/pipeline/metrics.py
Expand Up @@ -147,8 +147,8 @@ def stability_metrics(
"monitoring_rules": monitoring_rules,
"pull_rules": pull_rules,
"features": features,
**kwargs,
}
cfg.update(kwargs)

datastore = {"hists": hists}
if reference_type == "external":
Expand Down

0 comments on commit 98acf68

Please sign in to comment.