Skip to content

Commit

Permalink
Bugfix: synthetic DF batches not re-assembled properly with custom de… (
Browse files Browse the repository at this point in the history
#31)

* Bugfix: synthetic DF batches not re-assembled properly with custom delimiter

* remove print

Co-authored-by: John Myers <john@gretel.ai>
  • Loading branch information
johntmyers and John Myers committed Jun 18, 2020
1 parent f8b487f commit f4f6279
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 11 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.10.0
0.10.1
2 changes: 1 addition & 1 deletion src/gretel_synthetics/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def synthetic_df(self) -> pd.DataFrame:
if not self.gen_data_stream.getvalue(): # pragma: no cover
return pd.DataFrame()
self.gen_data_stream.seek(0)
return pd.read_csv(self.gen_data_stream)
return pd.read_csv(self.gen_data_stream, sep=self.config.field_delimiter)

def set_validator(self, fn: Callable, save=True):
"""Assign a validation callable to this batch. Optionally
Expand Down
12 changes: 3 additions & 9 deletions tests/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,19 +163,13 @@ def bad():

def test_batches_to_df(test_data):
batches = DataFrameBatch(df=pd.DataFrame([
{"foo": "bar", "foo1": "bar1", "foo2": "bar2", "foo3": 3}]), config=config_template, batch_size=1)
{"foo": "bar", "foo1": "bar1", "foo2": "bar2", "foo3": 3}]), config=config_template, batch_size=2)

batches.batches[0].add_valid_data(
gen_text(text="baz", valid=True, delimiter=",")
gen_text(text="baz|baz1", valid=True, delimiter="|")
)
batches.batches[1].add_valid_data(
gen_text(text="baz1", valid=True, delimiter=",")
)
batches.batches[2].add_valid_data(
gen_text(text="baz2", valid=True, delimiter=",")
)
batches.batches[3].add_valid_data(
gen_text(text="5", valid=True, delimiter=",")
gen_text(text="baz2|5", valid=True, delimiter="|")
)

check = batches.batches_to_df()
Expand Down

0 comments on commit f4f6279

Please sign in to comment.