Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Commit

Permalink
increase target auto-sample size to 2.5M
Browse files Browse the repository at this point in the history
  • Loading branch information
sevikkk committed Oct 25, 2016
1 parent 05937de commit c0312d1
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion datarobot_batch_scoring/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def auto_sampler(dataset, encoding, ui):

buf.close()
avg_line = int(size_bytes / csv_lines)
chunk_size_goal = int(1.5 * 1024 ** 2) # size we want per batch
chunk_size_goal = int(2.5 * 1024 ** 2) # size we want per batch
lines_per_sample = int(chunk_size_goal / avg_line) + 1
ui.debug('auto_sampler: lines counted: {}, avgerage line size: {}, '
'recommended lines per sample: {}'.format(csv_lines, avg_line,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def test_auto_sample():
with UI(None, logging.DEBUG, stdout=False) as ui:
data = 'tests/fixtures/criteo_top30_1m.csv.gz'
encoding = investigate_encoding_and_dialect(data, None, ui)
assert auto_sampler(data, encoding, ui) == 8988
assert auto_sampler(data, encoding, ui) == 14980
ui.close()


Expand Down

0 comments on commit c0312d1

Please sign in to comment.