Skip to content

Commit

Permalink
[PROD-311] Allow subsampling of columns for SQS.
Browse files Browse the repository at this point in the history
GitOrigin-RevId: d6a2b5daa1e4395c2b25586b7914c50771c06bbf
  • Loading branch information
tylersbray committed Feb 14, 2023
1 parent 44b1d77 commit ca7ad9a
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions src/gretel_synthetics/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,15 @@ def get_numeric_distribution_bins(training: pd.Series, synthetic: pd.Series):
# We also bin across the training and synthetic Series combined since we are binning across the combined range, otherwise we can see OOM's or sigkill's.
try:
bins = np.histogram_bin_edges(
training.append(synthetic), bins="doane", range=(min_value, max_value)
pd.concat([training, synthetic]), bins="doane", range=(min_value, max_value)
)
except Exception:
pass
# If 'doane' still doesn't do the trick just force 500 bins.
if len(bins) == 0 or len(bins) > 500:
try:
bins = np.histogram_bin_edges(
training, bins=500, range=(min_value, max_value)
pd.concat([training, synthetic]), bins=500, range=(min_value, max_value)
)
except Exception:
pass
Expand Down Expand Up @@ -301,7 +301,6 @@ def calculate_correlation(
A dataframe of correlation values.
"""

# If opt is True, then go the faster (just not quite as accurate) route of global replace missing with 0
if opt:
with pd.option_context("mode.use_inf_as_na", True):
Expand Down

0 comments on commit ca7ad9a

Please sign in to comment.