Skip to content

Commit

Permalink
refactor(embedding): level up embed method to top API add docs (#178)
Browse files Browse the repository at this point in the history
* refactor(embedding): level up embed method to top API add docs

* refactor(embedding): level up embed method to top API add docs
  • Loading branch information
hanxiao committed Oct 27, 2021
1 parent bf07ab1 commit 1ae201a
Show file tree
Hide file tree
Showing 10 changed files with 67 additions and 25 deletions.
Binary file added docs/basics/embed.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 44 additions & 3 deletions docs/basics/fit.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ Depending on your framework, `display` may require different argument for render

More information can be {ref}`found here<display-method>`.

## Embed documents

You can use `finetuner.embed()` method to compute the embeddings of a `DocumentArray` or `DocumentArrayMemmap`.

```python
import finetuner
from jina import DocumentArray

docs = DocumentArray(...)

finetuner.embed(docs, model)

print(docs.embeddings)
```

Note that, `model` above must be an {term}`Embedding model`.



## Example

```python
Expand All @@ -59,9 +78,6 @@ model, summary = finetuner.fit(
)

finetuner.display(model, input_size=(100,), input_dtype='long')

finetuner.save(model, './saved-model')
summary.plot('fit.png')
```

```console
Expand All @@ -81,7 +97,32 @@ Green layers can be used as embedding layers, whose name can be used as
layer_name in to_embedding_model(...).
```

```python
finetuner.save(model, './saved-model')
summary.plot('fit.png')
```

```{figure} fit-plot.png
:align: center
:width: 80%
```

```python
from jina import DocumentArray
all_q = DocumentArray(generate_qa_match())
finetuner.embed(all_q, model)
print(all_q.embeddings.shape)
```

```console
(481, 32)
```

```python
all_q.visualize('embed.png', method='tsne')
```

```{figure} embed.png
:align: center
:width: 80%
```
8 changes: 4 additions & 4 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Perfect! Now `embed_model` and `train_data` are already provided by you, simply
```python
import finetuner

tuned_model, _ = finetuner.fit(
tuned_model, summary = finetuner.fit(
embed_model,
train_data=train_data
)
Expand All @@ -159,7 +159,7 @@ emphasize-lines: 6
---
import finetuner
tuned_model, _ = finetuner.fit(
tuned_model, summary = finetuner.fit(
embed_model,
train_data=unlabeled_data,
interactive=True
Expand All @@ -183,7 +183,7 @@ emphasize-lines: 6, 7
---
import finetuner
tuned_model, _ = finetuner.fit(
tuned_model, summary = finetuner.fit(
general_model,
train_data=labeled_data,
to_embedding_model=True,
Expand All @@ -208,7 +208,7 @@ emphasize-lines: 6, 7
---
import finetuner
tuned_model, _ = finetuner.fit(
tuned_model, summary = finetuner.fit(
general_model,
train_data=labeled_data,
interactive=True,
Expand Down
5 changes: 3 additions & 2 deletions finetuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def fit(
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
) -> Tuple['AnyDNN', 'Summary']:
) -> Tuple['AnyDNN', None]:
...


Expand All @@ -91,7 +91,7 @@ def fit(
output_dim: Optional[int] = None,
freeze: bool = False,
device: str = 'cpu',
) -> Tuple['AnyDNN', 'Summary']:
) -> Tuple['AnyDNN', None]:
...


Expand All @@ -116,3 +116,4 @@ def fit(
# level them up to the top-level
from .tuner import save
from .tailor import display
from .embedding import embed
2 changes: 1 addition & 1 deletion finetuner/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .helper import AnyDNN, get_framework


def set_embeddings(
def embed(
docs: Union[DocumentArray, DocumentArrayMemmap],
embed_model: AnyDNN,
device: str = 'cpu',
Expand Down
6 changes: 3 additions & 3 deletions finetuner/labeler/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from jina import Executor, DocumentArray, requests, DocumentArrayMemmap
from jina.helper import cached_property

from ..embedding import set_embeddings
from ..embedding import embed
from ..tuner import fit, save


Expand Down Expand Up @@ -42,8 +42,8 @@ def embed(self, docs: DocumentArray, parameters: Dict, **kwargs):
min(len(self._all_data), int(parameters.get('sample_size', 1000)))
)

set_embeddings(docs, self._embed_model)
set_embeddings(_catalog, self._embed_model)
embed(docs, self._embed_model)
embed(_catalog, self._embed_model)

docs.match(
_catalog,
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from jina import DocumentArray, DocumentArrayMemmap

from finetuner.embedding import set_embeddings
from finetuner.embedding import embed
from finetuner.toydata import generate_fashion_match

embed_models = {
Expand Down Expand Up @@ -41,11 +41,11 @@ def test_set_embeddings(framework, tmpdir):
# works for DA
embed_model = embed_models[framework]()
docs = DocumentArray(generate_fashion_match(num_total=100))
set_embeddings(docs, embed_model)
embed(docs, embed_model)
assert docs.embeddings.shape == (100, 32)

# works for DAM
dam = DocumentArrayMemmap(tmpdir)
dam.extend(generate_fashion_match(num_total=42))
set_embeddings(dam, embed_model)
embed(dam, embed_model)
assert dam.embeddings.shape == (42, 32)
6 changes: 3 additions & 3 deletions tests/unit/tuner/keras/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from jina import DocumentArray, DocumentArrayMemmap

from finetuner.tuner.keras import KerasTuner
from finetuner.embedding import set_embeddings
from finetuner.embedding import embed
from finetuner.toydata import generate_fashion_match

all_test_losses = [
Expand Down Expand Up @@ -47,11 +47,11 @@ def test_set_embeddings_gpu(tmpdir):
]
)
docs = DocumentArray(generate_fashion_match(num_total=100))
set_embeddings(docs, embed_model, 'cuda')
embed(docs, embed_model, 'cuda')
assert docs.embeddings.shape == (100, 32)

# works for DAM
dam = DocumentArrayMemmap(tmpdir)
dam.extend(generate_fashion_match(num_total=42))
set_embeddings(dam, embed_model, 'cuda')
embed(dam, embed_model, 'cuda')
assert dam.embeddings.shape == (42, 32)
6 changes: 3 additions & 3 deletions tests/unit/tuner/paddle/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import paddle.nn as nn
from jina import DocumentArray, DocumentArrayMemmap

from finetuner.embedding import set_embeddings
from finetuner.embedding import embed
from finetuner.toydata import generate_fashion_match
from finetuner.tuner.paddle import PaddleTuner

Expand Down Expand Up @@ -45,11 +45,11 @@ def test_set_embeddings_gpu(tmpdir):
nn.Linear(in_features=128, out_features=32),
)
docs = DocumentArray(generate_fashion_match(num_total=100))
set_embeddings(docs, embed_model, 'cuda')
embed(docs, embed_model, 'cuda')
assert docs.embeddings.shape == (100, 32)

# works for DAM
dam = DocumentArrayMemmap(tmpdir)
dam.extend(generate_fashion_match(num_total=42))
set_embeddings(dam, embed_model, 'cuda')
embed(dam, embed_model, 'cuda')
assert dam.embeddings.shape == (42, 32)
6 changes: 3 additions & 3 deletions tests/unit/tuner/torch/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch.nn as nn
from jina import DocumentArray, DocumentArrayMemmap

from finetuner.embedding import set_embeddings
from finetuner.embedding import embed
from finetuner.toydata import generate_fashion_match
from finetuner.tuner.pytorch import PytorchTuner

Expand Down Expand Up @@ -49,11 +49,11 @@ def test_set_embeddings_gpu(tmpdir):
nn.Linear(in_features=128, out_features=32),
)
docs = DocumentArray(generate_fashion_match(num_total=100))
set_embeddings(docs, embed_model, 'cuda')
embed(docs, embed_model, 'cuda')
assert docs.embeddings.shape == (100, 32)

# works for DAM
dam = DocumentArrayMemmap(tmpdir)
dam.extend(generate_fashion_match(num_total=42))
set_embeddings(dam, embed_model, 'cuda')
embed(dam, embed_model, 'cuda')
assert dam.embeddings.shape == (42, 32)

0 comments on commit 1ae201a

Please sign in to comment.