Skip to content

Commit

Permalink
Fix 'make import_local_data' ETA computation (#67)
Browse files Browse the repository at this point in the history
  • Loading branch information
AloysAugustin authored and sylvinus committed Oct 14, 2016
1 parent fc56e8f commit aa80573
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
7 changes: 3 additions & 4 deletions cosrlib/dataproviders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def import_dump(self):
db = Storage(read_only=False)

write_batch = db.write_batch(None)
batch_time = time.time()
start_time = time.time()

done = 0

Expand Down Expand Up @@ -97,17 +97,16 @@ def import_dump(self):
eta = float(
self.dump_count_estimate - done
) / (
3600.0 * done / (time.time() - batch_time)
3600.0 * done / (time.time() - start_time)
)

print("Done %s (%s/s, ~%0.2f%%, ETA %0.2fh)" % (
done,
int(done / (time.time() - batch_time)),
int(done / (time.time() - start_time)),
(float(done * 100) / self.dump_count_estimate) if self.dump_count_estimate else 0,
eta
))
write_batch = db.write_batch(write_batch)
batch_time = time.time()

print("Total rows: %s" % done)
db.write_batch(write_batch)
Expand Down
2 changes: 1 addition & 1 deletion cosrlib/dataproviders/dmoz.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class DataProvider(BaseDataProvider):
dump_compression = "gz"
dump_format = "xml"
dump_batch_size = 100000
dump_count_estimate = 3000000
dump_count_estimate = 3600000

def import_row(self, i, row):
""" Returns a (key, value) pair for this row from the dump file """
Expand Down

0 comments on commit aa80573

Please sign in to comment.