Skip to content
Permalink
Browse files

feat(score_fn): make score_fn dumpable

  • Loading branch information...
hanxiao committed Sep 3, 2019
1 parent 8322f15 commit 14c7e52261b80c8ddde6f760fbe6857ca2dc6c55
Showing with 21 additions and 17 deletions.
  1. +7 −6 gnes/indexer/chunk/annoy.py
  2. +11 −6 gnes/score_fn/base.py
  3. +0 −1 gnes/score_fn/chunk.py
  4. +0 −1 gnes/score_fn/doc.py
  5. +3 −3 gnes/score_fn/normalize.py
@@ -33,6 +33,13 @@ def __init__(self, num_dim: int, data_path: str, metric: str = 'angular', n_tree
self.metric = metric
self.n_trees = n_trees
self._key_info_indexer = ListKeyIndexer()
if self.metric in {'angular', 'hamming'}:
self.normalize_fn = ScoreOps.reciprocal1p
elif self.metric == 'euclidean':
self.normalize_fn = Normalizer3(self.num_dim)
elif self.metric == 'manhattan':
self.normalize_fn = Normalizer2(self.num_dim)


def post_init(self):
from annoy import AnnoyIndex
@@ -46,12 +53,6 @@ def post_init(self):
except:
self.logger.warning('fail to load model from %s, will create an empty one' % self.data_path)

if self.metric in {'angular', 'hamming'}:
self.normalize_fn = ScoreOps.reciprocal1p
elif self.metric == 'euclidean':
self.normalize_fn = Normalizer3(self.num_dim)
elif self.metric == 'manhattan':
self.normalize_fn = Normalizer2(self.num_dim)

def add(self, keys: List[Tuple[int, Any]], vectors: np.ndarray, weights: List[float], *args, **kwargs):
last_idx = self._key_info_indexer.size
@@ -66,12 +66,17 @@ class ModifierFn(BaseScoreFn):
score = modifier(factor * value)
"""

def __init__(self, modifier: str = 'none', factor: float = 1.0):
def __init__(self, modifier: str = 'none', factor: float = 1.0, factor_name: str = 'GivenConstant'):
if modifier not in {'none', 'log', 'log1p', 'log2p', 'ln', 'ln1p', 'ln2p', 'square', 'sqrt', 'reciprocal',
'reciprocal1p', 'abs'}:
raise AttributeError('modifier=%s is not supported!' % modifier)
self.modifier = modifier
self.factor = get_unary_score(factor)
self._modifier = modifier
self._factor = factor
self._factor_name = factor_name

@property
def factor(self):
return get_unary_score(value=self._factor, name=self._factor_name)

def op(self, *args, **kwargs) -> float:
return {
@@ -89,19 +94,19 @@ def op(self, *args, **kwargs) -> float:
'abs': abs,
'invert': lambda x: - x,
'invert1p': lambda x: 1 - x
}[self.modifier](*args, **kwargs)
}[self._modifier](*args, **kwargs)

def __call__(self,
last_score: 'gnes_pb2.Response.QueryResponse.ScoredResult.Score',
*args, **kwargs) -> \
'gnes_pb2.Response.QueryResponse.ScoredResult.Score':
if self.modifier == 'none' and self.factor.value == 1.0:
if self._modifier == 'none' and self._factor == 1.0:
return last_score
else:
return self.new_score(
value=self.op(self.factor.value * last_score.value),
operands=[last_score],
modifier=self.modifier,
modifier=self._modifier,
factor=json.loads(self.factor.explained))


@@ -1,5 +1,4 @@
from .base import get_unary_score, ScoreCombinedFn
from ..proto import gnes_pb2


class WeightedChunkScoreFn(ScoreCombinedFn):
@@ -1,5 +1,4 @@
from .base import get_unary_score, ScoreCombinedFn
from ..proto import gnes_pb2


class WeightedDocScoreFn(ScoreCombinedFn):
@@ -1,4 +1,4 @@
from .base import ModifierFn, ScoreOps as so, get_unary_score
from .base import ModifierFn, ScoreOps as so


class Normalizer1(ModifierFn):
@@ -18,7 +18,7 @@ class Normalizer2(ModifierFn):
def __init__(self, num_dim: int):
super().__init__()
self.modifier = 'reciprocal1p'
self.factor = so.reciprocal(get_unary_score(value=num_dim, name='GivenConstant'))
self._factor = 1.0 / num_dim


class Normalizer3(Normalizer2):
@@ -34,7 +34,7 @@ class Normalizer4(ModifierFn):
def __init__(self, num_bytes: int):
super().__init__()
self.modifier = 'invert1p'
self.factor = so.reciprocal(get_unary_score(value=num_bytes, name='GivenConstant'))
self._factor = 1.0 / num_bytes


class Normalizer5(ModifierFn):

0 comments on commit 14c7e52

Please sign in to comment.
You can’t perform that action at this time.