Skip to content

Commit

Permalink
create utils.es.ESIndex, successor to ESIndexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Jerry committed Sep 21, 2021
1 parent 748baa1 commit 88db59e
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 18 deletions.
29 changes: 11 additions & 18 deletions biothings/hub/dataindex/indexer_task.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from collections import namedtuple
import functools
import logging
from enum import Enum
from functools import partial
from types import SimpleNamespace

from biothings.utils.loggers import get_logger
from biothings.utils.es import ESIndex as BaseESIndex
from elasticsearch import Elasticsearch, helpers
from pymongo import MongoClient

Expand All @@ -20,27 +20,12 @@

_IDExists = namedtuple("IDExists", ("id", "exists"))

class ESIndex():
""" An Elasticsearch Index Wrapping A Client.
Counterpart for pymongo.collection.Collection """

# previously using biothings.utils.es.ESIndexer
# recently reimplemented here for better clarity.
class ESIndex(BaseESIndex):

def __init__(self, client, index_name, **bulk_index_args):
self.client = client
self.index_name = index_name # MUST exist
super().__init__(client, index_name)
self.bulk_index_args = bulk_index_args

@property
@functools.lru_cache()
def doc_type(self):
if int(self.client.info()['version']['number'].split('.')[0]) < 7:
mappings = self.client.indices.get_mapping(self.index_name)
mappings = mappings[self.index_name]["mappings"]
return next(iter(mappings.keys()))
return None

# --------------------
# bulk operations (m*)
# --------------------
Expand Down Expand Up @@ -100,6 +85,14 @@ def _action(doc):
self.client, map(_action, docs),
**self.bulk_index_args)[0]

# NOTE
# Why doesn't "mget", "mexists", "mindex" belong to the base class?
# At this moment, their interfaces are too opinionated/customized
# for usage in this module. Unless we find them directly useful in
# another module in the future, proving their genericity, they should
# stay close in this module only.


# Data Collection Client

def _get_es_client(es_client_args, es_blk_args, es_idx_name):
Expand Down
31 changes: 31 additions & 0 deletions biothings/utils/es.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import re
import time
import functools

from elasticsearch import (Elasticsearch, NotFoundError, RequestError,
TransportError, ElasticsearchException)
Expand Down Expand Up @@ -73,6 +74,36 @@ def outter_fn(*args, **kwargs):
class IndexerException(Exception):
pass

class ESIndex():
""" An Elasticsearch Index Wrapping A Client.
Counterpart for pymongo.collection.Collection """

# a new approach to biothings.utils.es.ESIndexer
# but not intended to be a replacement in features.

def __init__(self, client, index_name):
self.client = client # synchronous
self.index_name = index_name # MUST exist

@property
@functools.lru_cache()
def doc_type(self):
if int(self.client.info()['version']['number'].split('.')[0]) < 7:
mappings = self.client.indices.get_mapping(self.index_name)
mappings = mappings[self.index_name]["mappings"]
return next(iter(mappings.keys()))
return None

# SUBCLASS NOTE &&
# BEFORE YOU ADD A METHOD UNDER THIS CLASS:

# An object of this class refers to an existing ES index. All operations
# should target this index. Do not put uncommon methods here. They belong
# to the subclasses. This class is to provide a common framework to support
# Index-specific ES operations, an concept does not exist in the low-level
# ES library, thus only providing low-level common operations, like doc_type
# parsing across ES versions for biothings usecases. (single type per index)

class ESIndexer():
# RETIRING THIS CLASS

Expand Down

0 comments on commit 88db59e

Please sign in to comment.