Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add artifact id and token endpoint #485

Merged
merged 8 commits into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Add artifact id and token interface to improve usability. ([#485](https://github.com/jina-ai/finetuner/pull/485))

### Removed

### Changed
Expand Down
9 changes: 9 additions & 0 deletions finetuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,12 @@ def delete_experiments() -> List[Experiment]:
:return: List of deleted experiments.
"""
return ft.delete_experiments()


def get_token() -> str:
"""Get user token.

This is a helper function to get token and pull artifact from cloud storage.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the docstring is confusing. This function does not download the artifact

:return: user token as string object.
"""
return ft.get_token()
2 changes: 1 addition & 1 deletion finetuner/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import requests

import hubble
from finetuner.client.exception import FinetunerServerError
from finetuner.constants import (
AUTHORIZATION,
CHARSET,
Expand All @@ -15,6 +14,7 @@
TOKEN_PREFIX,
UTF_8,
)
from finetuner.exception import FinetunerServerError


class _BaseClient:
Expand Down
2 changes: 1 addition & 1 deletion finetuner/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
ARTIFACTS_DIR = 'artifacts/'
MODEL = 'model'
MODEL_OPTIONS = 'model_options'
MODEL_IDS = 'model_ids'
ARTIFACT_ID = 'artifact_id'
# Run status
CREATED = 'CREATED'
STARTED = 'STARTED'
Expand Down
12 changes: 12 additions & 0 deletions finetuner/client/exception.py → finetuner/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,15 @@ def __init__(

def __str__(self):
return f'{self.message} ({self.code}): {self.details}'


class RunInProgressError(Exception):
...


class RunFailedError(Exception):
...


class UserNotLoginError(Exception):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UserNotLoggedIn

...
6 changes: 6 additions & 0 deletions finetuner/finetuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import hubble
from finetuner.client import FinetunerV1Client
from finetuner.constants import CREATED_AT, DESCRIPTION, NAME, STATUS
from finetuner.exception import UserNotLoginError
from finetuner.experiment import Experiment
from finetuner.run import Run

Expand Down Expand Up @@ -236,3 +237,8 @@ def delete_runs(self, experiment_name: Optional[str] = None):
experiments = [self.get_experiment(name=experiment_name)]
for experiment in experiments:
experiment.delete_runs()

def get_token(self) -> str:
if not self._client:
raise UserNotLoginError('Please call `login` before getting token.')
return hubble.Auth.get_auth_token()
13 changes: 5 additions & 8 deletions finetuner/hubble.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,19 @@ def _push_docarray(


def download_artifact(
client, experiment_name: str, run_name: str, directory: str = ARTIFACTS_DIR
client, artifact_id: str, run_name: str, directory: str = ARTIFACTS_DIR
) -> str:
"""Download artifact from Hubble by its ID.

:param client: The Finetuner API client.
:param experiment_name: The name of the experiment.
:param run_name: The name of the run.
:param client: Hubble client instance.
:param artifact_id: The artifact id stored in the Hubble.
:param run_name: The name of the run as artifact name to store locally.
:param directory: Directory where the artifact will be stored.
:returns: A string that indicates the download path.
"""
os.makedirs(directory, exist_ok=True)

run = client.get_run(experiment_name=experiment_name, run_name=run_name)

artifact_id = run['artifact_id']
path = os.path.join(directory, f'{run["name"]}.zip')
path = os.path.join(directory, f'{run_name}.zip')

return client.hubble_client.download_artifact(
id=artifact_id, f=path, show_progress=True
Expand Down
47 changes: 42 additions & 5 deletions finetuner/run.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from finetuner.client import FinetunerV1Client
from finetuner.constants import ARTIFACTS_DIR, FINISHED, STATUS
from finetuner.constants import (
ARTIFACT_ID,
ARTIFACTS_DIR,
CREATED,
FAILED,
STARTED,
STATUS,
)
from finetuner.exception import RunFailedError, RunInProgressError
from finetuner.hubble import download_artifact


Expand Down Expand Up @@ -29,6 +37,7 @@ def __init__(
self._config = config
self._created_at = created_at
self._description = description
self._run = self._get_run()

@property
def name(self) -> str:
Expand All @@ -38,6 +47,12 @@ def name(self) -> str:
def config(self) -> dict:
return self._config

def _get_run(self) -> dict:
"""Get Run object as dict."""
return self._client.get_run(
experiment_name=self._experiment_name, run_name=self._name
)

def status(self) -> dict:
"""Run status.

Expand All @@ -56,18 +71,40 @@ def logs(self) -> str:
experiment_name=self._experiment_name, run_name=self._name
)

def _check_run_status(self):
status = self.status()[STATUS]
if status in [CREATED, STARTED]:
raise RunInProgressError(
'The run needs to be finished in order to save the model.'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

save the artifact

)
if status == FAILED:
raise RunFailedError(
'The run failed, please check the `logs` for detailed information.'
)

def save_artifact(self, directory: str = ARTIFACTS_DIR) -> str:
"""Save artifact if the run is finished.

:param directory: Directory where the artifact will be stored.
:returns: A string object that indicates the download path.
"""
if self.status()[STATUS] != FINISHED:
raise Exception('The run needs to be finished in order to save the model.')

self._check_run_status()
return download_artifact(
client=self._client,
experiment_name=self._experiment_name,
artifact_id=self._run[ARTIFACT_ID],
run_name=self._name,
directory=directory,
)

def artifact_id(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a property. Otherwise rename to get_artifact_id

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about staus()?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well I think all should be properties 😛

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i believe status() and logs() should be get_, the rest could be property

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

having static ones as properties and non-statics with get_ makes sense to me, but I'm slightly leaning towards what George suggested because it looks simpler and more elegant

"""Get artifact id from the run.

An artifact in finetuner contains fine-tuned model and its metadata.
Such as preprocessing function, collate function. This id could be useful
if you want to directly pull the artifact from the cloud storage, such as
using `FinetunerExecutor`.

:return: Artifact id as string object.
"""
self._check_run_status()
return self._run[ARTIFACT_ID]
6 changes: 6 additions & 0 deletions tests/integration/test_runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ def test_create_run_and_save_model(finetuner_mocker, get_feature_data, tmp_path)
status = run.status()[STATUS]

assert status == FINISHED

artifact_id = run.artifact_id()
assert isinstance(artifact_id, str)
# the artifact id is a 24 character hex string defined in mongo db.
assert len(artifact_id) == 24

run.save_artifact(directory=tmp_path / 'finetuned_model')
assert os.path.exists(tmp_path / 'finetuned_model')

Expand Down