diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 8b70652e..ce976624 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -12,7 +12,7 @@ Reference types When generating a report from a DataFrame, the reference type can be set with the option ``reference_type``, in four different ways: -1. Using the DataFrame on which the stability report is built as a self-reference. This reference method is static: each time slot is compared to all the previous slots in the DataFrame (all included in one distribution). This is the default reference setting. +1. Using the DataFrame on which the stability report is built as a self-reference. This reference method is static: each time slot is compared to all the slots in the DataFrame (all included in one distribution). This is the default reference setting. .. code-block:: python @@ -40,6 +40,10 @@ in four different ways: # generate stability report with specific monitoring rules report = df.pm_stability_report(reference_type="expanding", shift=1) +Note that, by default, popmon also performs a rolling comparison of the histograms in each time period with those in the +previous time period. The results of these comparisons contain the term "prev1", and are found in the comparisons section +of a report. + Binning specifications ---------------------- @@ -53,7 +57,8 @@ To specify the time-axis binning alone, do: report = df.pm_stability_report(time_axis='date', time_width='1w', time_offset='2020-1-6') -The default time width is 4 weeks ('4w'). All other features (except for 'date') are auto-binned in this example. +The default time width is 30 days ('30d'), with time offset 2010-1-4 (a Monday). +All other features (except for 'date') are auto-binned in this example. To specify your own binning specifications for individual features or combinations of features, do: diff --git a/popmon/hist/filling/spark_histogrammar.py b/popmon/hist/filling/spark_histogrammar.py index 560dc0ce..ad13add6 100644 --- a/popmon/hist/filling/spark_histogrammar.py +++ b/popmon/hist/filling/spark_histogrammar.py @@ -7,7 +7,6 @@ """ import histogrammar as hg -import histogrammar.sparksql import numpy as np from tqdm import tqdm @@ -189,8 +188,6 @@ def process_features(self, df, cols_by_type): to_ns = sparkcol(col).cast("timestamp").cast("float") * 1e9 idf = idf.withColumn(col, to_ns) - hg.sparksql.addMethods(idf) - return idf def construct_empty_hist(self, df, features): @@ -218,9 +215,6 @@ def construct_empty_hist(self, df, features): hist = self.get_hist_bin(hist, features, quant, col, dt) - # set data types in histogram - dta = [self.var_dtype[col] for col in features] - hist.datatype = dta[0] if len(features) == 1 else dta return hist def fill_histograms(self, idf):