fix(indexer): add drop raw bytes option to leveldb
Larryjianfeng committed Jul 23, 2019
1 parent f84b5c7 commit a4b883acb312b5f47d34955d3ec2dccb4cd782c6
Showing with 12 additions and 1 deletion.
  1. +12 −1 gnes/indexer/fulltext/
@@ -26,10 +26,16 @@

class LVDBIndexer(BaseTextIndexer):

def __init__(self, data_path: str, keep_na_doc: bool = True, *args, **kwargs):
def __init__(self, data_path: str,
keep_na_doc: bool = True,
drop_raw_bytes: bool = False,
drop_chunk_blob: bool = False,
*args, **kwargs):
super().__init__(*args, **kwargs)
self.data_path = data_path
self.keep_na_doc = keep_na_doc
self.drop_raw_bytes = drop_raw_bytes
self.drop_chunk_blob = drop_chunk_blob
self._NOT_FOUND = None

def post_init(self):
@@ -40,6 +46,11 @@ def add(self, keys: List[int], docs: List['gnes_pb2.Document'], *args, **kwargs)
with self._db.write_batch() as wb:
for k, d in zip(keys, docs):
doc_id = pickle.dumps(k)
if self.drop_raw_bytes:
d.raw_bytes = b''
if self.drop_chunk_blob:
for i in range(len(d.chunks)):
doc = d.SerializeToString()
wb.put(doc_id, doc)

