Skip to content

Commit

Permalink
default builder to use sharded collection
Browse files Browse the repository at this point in the history
  • Loading branch information
sirloon committed Dec 3, 2019
1 parent 7b2a3d9 commit b1b4228
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions src/hub/databuild/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,21 @@
from biothings.utils.mongo import id_feeder
import biothings.utils.mongo as mongo
import biothings.hub.databuild.builder as builder
from biothings.hub.databuild.backend import ShardedTargetDocMongoBackend
import config

class MyVariantDataBuilder(builder.DataBuilder):

MAX_CHROM_EX = 100000 # if chrom discrepancies found, max # of examples we keep

def __init__(self, build_name, source_backend, target_backend, *args, **kwargs):
shared_tgt_backend = partial(ShardedTargetDocMongoBackend,
target_db=partial(mongo.get_target_db))
super().__init__(build_name=build_name,
source_backend=source_backend,
target_backend=shared_tgt_backend,
*args,**kwargs)

def merge(self, sources=None, target_name=None, batch_size=50000, job_manager=None, **kwargs):
# just override default batch_size or it consumes too much mem
return super(MyVariantDataBuilder,self).merge(
Expand Down

0 comments on commit b1b4228

Please sign in to comment.