Skip to content

Commit

Permalink
Renaming Angular to Cosine in Python client (#295)
Browse files Browse the repository at this point in the history
Same as #294, but for the Python client.
  • Loading branch information
alexklibisz committed Jul 25, 2021
1 parent 6798e3b commit dcabb8c
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 28 deletions.
16 changes: 8 additions & 8 deletions client-python/elastiknn/api.py
Expand Up @@ -16,7 +16,7 @@ class Similarity(Enum):
Hamming = 2
L1 = 3
L2 = 4
Angular = 5
Cosine = 5


class Vec:
Expand Down Expand Up @@ -141,7 +141,7 @@ def to_dict(self):
}

@dataclass(frozen=True)
class AngularLsh(Base):
class CosineLsh(Base):
dims: int
L: int
k: int
Expand All @@ -151,7 +151,7 @@ def to_dict(self):
"type": "elastiknn_dense_float_vector",
"elastiknn": {
"model": "lsh",
"similarity": "angular",
"similarity": "cosine",
"dims": self.dims,
"L": self.L,
"k": self.k
Expand Down Expand Up @@ -282,10 +282,10 @@ def with_vec(self, vec: Vec.Base):
candidates=self.candidates)

@dataclass(frozen=True)
class AngularLsh(Base):
class CosineLsh(Base):
field: str
vec: Vec.Base
similarity: Similarity = Similarity.Angular
similarity: Similarity = Similarity.Cosine
candidates: int = 1000

def to_dict(self):
Expand All @@ -298,8 +298,8 @@ def to_dict(self):
}

def with_vec(self, vec: Vec.Base):
return NearestNeighborsQuery.AngularLsh(field=self.field, vec=vec, similarity=self.similarity,
candidates=self.candidates)
return NearestNeighborsQuery.CosineLsh(field=self.field, vec=vec, similarity=self.similarity,
candidates=self.candidates)

@dataclass(frozen=True)
class L2Lsh(Base):
Expand Down Expand Up @@ -327,7 +327,7 @@ def with_vec(self, vec: Vec.Base):
class PermutationLsh(Base):
field: str
vec: Vec.Base
similarity: Similarity = Similarity.Angular
similarity: Similarity = Similarity.Cosine
candidates: int = 1000

def to_dict(self):
Expand Down
18 changes: 9 additions & 9 deletions client-python/elastiknn/models.py
Expand Up @@ -90,8 +90,8 @@ def _mk_mapping_query(self, query_params: dict()) -> (Mapping.Base, NearestNeigh
return Mapping.DenseFloat(self._dims), NearestNeighborsQuery.Exact(field, dummy, Similarity.L1)
elif self._metric == 'l2':
return Mapping.DenseFloat(self._dims), NearestNeighborsQuery.Exact(field, dummy, Similarity.L2)
elif self._metric == 'angular':
return Mapping.DenseFloat(self._dims), NearestNeighborsQuery.Exact(field, dummy, Similarity.Angular)
elif self._metric == 'cosine':
return Mapping.DenseFloat(self._dims), NearestNeighborsQuery.Exact(field, dummy, Similarity.Cosine)
elif self._metric == 'jaccard':
return Mapping.SparseBool(self._dims), NearestNeighborsQuery.Exact(field, dummy, Similarity.Jaccard)
elif self._metric == 'hamming':
Expand All @@ -108,18 +108,18 @@ def _mk_mapping_query(self, query_params: dict()) -> (Mapping.Base, NearestNeigh
m, q = Mapping.L2Lsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.L2Lsh(field, dummy, **query_params)
return m, q
elif self._metric == 'angular':
return Mapping.AngularLsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.AngularLsh(field, dummy, **query_params)
elif self._metric == 'cosine':
return Mapping.CosineLsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.CosineLsh(field, dummy, **query_params)
elif self._metric == 'hamming':
return Mapping.AngularLsh(self._dims, **self._mapping_params), \
return Mapping.CosineLsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.HammingLsh(field, dummy, **query_params)
elif self._metric == 'jaccard':
return Mapping.JaccardLsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.JaccardLsh(field, dummy, **query_params)
elif self._algorithm == 'permutation_lsh':
if self._metric == 'angular':
if self._metric == 'cosine':
return Mapping.PermutationLsh(self._dims, **self._mapping_params), \
NearestNeighborsQuery.PermutationLsh(field, dummy, Similarity.Angular, **query_params)
NearestNeighborsQuery.PermutationLsh(field, dummy, Similarity.Cosine, **query_params)

raise NameError
raise NameError
8 changes: 3 additions & 5 deletions client-python/elastiknn/utils.py
Expand Up @@ -12,25 +12,23 @@
valid_metrics_algos = [
('exact', 'l1'),
('exact', 'l2'),
('exact', 'angular'),
('exact', 'cosine'),
('exact', 'hamming'),
('exact', 'jaccard'),
('sparse_indexed', 'jaccard'),
('sparse_indexed', 'hamming'),
('lsh', 'l2'),
('lsh', 'angular'),
('lsh', 'cosine'),
('lsh', 'jaccard'),
('lsh', 'hamming'),
('permutation_lsh', 'angular'),
('permutation_lsh', 'cosine'),
('permutation_lsh', 'l2')
]

def dealias_metric(metric: str) -> str:
mlower = metric.lower()
if mlower == 'euclidean':
return 'l2'
elif mlower == 'cosine':
return 'angular'
else:
return mlower

Expand Down
12 changes: 6 additions & 6 deletions docs/pages/index.md
Expand Up @@ -14,12 +14,12 @@ This enables users to combine traditional queries (e.g., "some product") with ve

## Features

- Datatypes to efficiently store dense and sparse numerical vectors in Elasticsearch documents.
- Exact nearest neighbor queries for five similarity functions: [L1](https://en.wikipedia.org/wiki/Taxicab_geometry), [L2](https://en.wikipedia.org/wiki/Euclidean_distance), [Angular](https://en.wikipedia.org/wiki/Cosine_similarity), [Jaccard](https://en.wikipedia.org/wiki/Jaccard_index), and [Hamming](https://en.wikipedia.org/wiki/Hamming_distance).
- Approximate queries using [Locality Sensitive Hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) and related algorithms for all similarities.
- Compose nearest neighbor queries with standard Elasticsearch queries.
- Incrementally build and update your index. Elastiknn doesn't perform any sort of model fitting, and a vector is just a field in a document. So you can start with 1 vector or 1 million and then create/update/delete documents and vectors without ever re-building the entire index.
- Implemented with standard Elasticsearch and Lucene primitives. Executes entirely in the Elasticsearch JVM. This means deployment is a simple plugin installation and indexing and querying both scale horizontally with Elasticsearch.
- Datatypes to efficiently store dense and sparse numerical vectors in Elasticsearch documents, including multiple vectors per document.
- Exact nearest neighbor queries for five similarity functions: [L1](https://en.wikipedia.org/wiki/Taxicab_geometry), [L2](https://en.wikipedia.org/wiki/Euclidean_distance), [Cosine](https://en.wikipedia.org/wiki/Cosine_similarity), [Jaccard](https://en.wikipedia.org/wiki/Jaccard_index), and [Hamming](https://en.wikipedia.org/wiki/Hamming_distance).
- Approximate queries using [Locality Sensitive Hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) and related algorithms for L2, Cosine, Jaccard, and Hamming similarity.
- Integration of nearest neighbor queries with standard Elasticsearch queries.
- Incremental index updates. Start with 1 vector or 1 million vectors and then create/update/delete documents and vectors without ever re-building the entire index.
- Implementation based on standard Elasticsearch and Lucene primitives, entirely in the JVM. This means deployment is a simple plugin installation and indexing and querying both scale horizontally with Elasticsearch.

_Non-Features_

Expand Down

0 comments on commit dcabb8c

Please sign in to comment.