v0.0.63 expose use_bloom_filter in dump_to_sql

datahq · Oct 8, 2019 · 6fb3e68 · 6fb3e68
1 parent 8faa862
commit 6fb3e68
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 2 deletions.
diff --git a/PROCESSORS.md b/PROCESSORS.md
@@ -212,7 +212,8 @@ def dump_to_sql(tables,
   - `true` - row was updated
   - `false` - row was inserted
 - `updated_id_column` - Optional name of a column that will be added to the output data containing the id of the updated row in DB.
-- `batch_size` - Maximum amount of rows to write at the same time to the DB.
+- `batch_size` - Maximum amount of rows to write at the same time to the DB (default 1000)
+- `use_bloom_filter` - Preprocess existing DB data to improve update performance (default: True)
 
 #### checkpoint
 

diff --git a/dataflows/VERSION b/dataflows/VERSION
@@ -1 +1 @@
-0.0.62
+0.0.63
diff --git a/dataflows/processors/dumpers/to_sql.py b/dataflows/processors/dumpers/to_sql.py
@@ -76,6 +76,7 @@ def __init__(self,
         self.updated_column = updated_column
         self.updated_id_column = updated_id_column
         self.batch_size = options.get('batch_size', 1000)
+        self.use_bloom_filter = options.get('use_bloom_filter', True)
 
     def normalize_for_engine(self, dialect, resource, schema_descriptor):
         actions = {}
@@ -132,6 +133,7 @@ def process_resource(self, resource: ResourceWrapper):
                            keyed=True, as_generator=True,
                            update_keys=update_keys,
                            buffer_size=self.batch_size,
+                           use_bloom_filter=self.use_bloom_filter,
                        ))
 
     def get_output_row(self, written):