Skip to content

Commit

Permalink
v0.0.63 expose use_bloom_filter in dump_to_sql
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Oct 8, 2019
1 parent 8faa862 commit 6fb3e68
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 2 deletions.
3 changes: 2 additions & 1 deletion PROCESSORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ def dump_to_sql(tables,
- `true` - row was updated
- `false` - row was inserted
- `updated_id_column` - Optional name of a column that will be added to the output data containing the id of the updated row in DB.
- `batch_size` - Maximum amount of rows to write at the same time to the DB.
- `batch_size` - Maximum amount of rows to write at the same time to the DB (default 1000)
- `use_bloom_filter` - Preprocess existing DB data to improve update performance (default: True)

#### checkpoint

Expand Down
2 changes: 1 addition & 1 deletion dataflows/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.62
0.0.63
2 changes: 2 additions & 0 deletions dataflows/processors/dumpers/to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(self,
self.updated_column = updated_column
self.updated_id_column = updated_id_column
self.batch_size = options.get('batch_size', 1000)
self.use_bloom_filter = options.get('use_bloom_filter', True)

def normalize_for_engine(self, dialect, resource, schema_descriptor):
actions = {}
Expand Down Expand Up @@ -132,6 +133,7 @@ def process_resource(self, resource: ResourceWrapper):
keyed=True, as_generator=True,
update_keys=update_keys,
buffer_size=self.batch_size,
use_bloom_filter=self.use_bloom_filter,
))

def get_output_row(self, written):
Expand Down

0 comments on commit 6fb3e68

Please sign in to comment.