Skip to content

Commit

Permalink
speedup duplicate processor using kvfile insert generator (#28)
Browse files Browse the repository at this point in the history
* speedup duplicate processor using kvfile insert generator

* Update duplicate.py

* Update duplicate.py
  • Loading branch information
OriHoch authored and akariv committed Oct 15, 2018
1 parent ab2f52c commit ae5da8b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
12 changes: 9 additions & 3 deletions dataflows/processors/duplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@


def saver(resource, db, batch_size):
db.insert((("{:08x}".format(idx), row) for idx, row in enumerate(resource)), batch_size=batch_size)
gen = db.insert_generator(
(("{:08x}".format(idx), row)
for idx, row
in enumerate(resource)),
batch_size=batch_size
)
for _, row in gen:
yield row


def loader(db):
Expand Down Expand Up @@ -37,8 +44,7 @@ def traverse_resources(resources):
for resource in package:
if resource.res.name == source_:
db = KVFile()
saver(resource, db, batch_size)
yield loader(db)
yield saver(resource, db, batch_size)
yield loader(db)
else:
yield resource
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def read(*paths):
NAME = PACKAGE.replace('_', '-')
INSTALL_REQUIRES = [
'datapackage>=1.3.2',
'kvfile',
'kvfile>=0.0.4',
'click',
'jinja2',
'awesome-slugify',
Expand Down

0 comments on commit ae5da8b

Please sign in to comment.