Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactoring] resolved some TODOs items from the source code #1496

Merged
merged 13 commits into from May 26, 2022
90 changes: 44 additions & 46 deletions deepchecks/core/check_result.py
Expand Up @@ -34,7 +34,7 @@
from deepchecks.core.serialization.check_result.widget import CheckResultSerializer as CheckResultWidgetSerializer
from deepchecks.utils.ipython import is_colab_env, is_kaggle_env, is_notebook, is_widgets_enabled
from deepchecks.utils.strings import create_new_file_name, widget_to_html, widget_to_html_string
from deepchecks.utils.wandb_utils import set_wandb_run_state
from deepchecks.utils.wandb_utils import wandb_run

from .serialization.check_failure.ipython import CheckFailureSerializer as CheckFailureIPythonSerializer
from .serialization.check_result.ipython import CheckResultSerializer as CheckResultIPythonSerializer
Expand Down Expand Up @@ -377,41 +377,37 @@ def to_widget(

def to_wandb(
self,
dedicated_run: bool = True,
dedicated_run: Optional[bool] = None,
**kwargs: Any
):
"""Export check result to wandb.

Parameters
----------
dedicated_run : bool , default: None
If to initiate and finish a new wandb run.
If None it will be dedicated if wandb.run is None.
dedicated_run : bool, default True
whether to create a separate wandb run or not
(deprecated parameter, does not have any effect anymore)
kwargs: Keyword arguments to pass to wandb.init.
Default project name is deepchecks.
Default config is the check metadata (params, train/test/ name etc.).
"""
# NOTE: Wandb is not a default dependency
# user should install it manually therefore we are
# doing import within method to prevent premature ImportError
try:
import wandb

from .serialization.check_result.wandb import CheckResultSerializer as WandbSerializer
except ImportError as error:
raise ImportError(
'Wandb serializer requires the wandb python package. '
'To get it, run "pip install wandb".'
) from error
else:
dedicated_run = set_wandb_run_state(
dedicated_run,
{'header': self.get_header(), **self.check.metadata()},
**kwargs
assert self.check is not None
from .serialization.check_result.wandb import CheckResultSerializer as WandbSerializer

if dedicated_run is not None:
warnings.warn(
'"dedicated_run" parameter is deprecated and does not have effect anymore. '
'It will be remove in next versions.'
)
wandb.log(WandbSerializer(self).serialize())
if dedicated_run: # TODO: create context manager for this
wandb.finish()

wandb_kwargs = {'config': {'header': self.get_header(), **self.check.metadata()}}
wandb_kwargs.update(**kwargs)

with wandb_run(**wandb_kwargs) as run:
run.log(WandbSerializer(self).serialize())

def to_json(self, with_display: bool = True) -> str:
"""Return check result as json.
Expand Down Expand Up @@ -612,7 +608,7 @@ def to_widget(self) -> Widget:
"""Return CheckFailure as a ipywidgets.Widget instance."""
return CheckFailureWidgetSerializer(self).serialize()

def to_json(self, with_display: bool = True):
def to_json(self, with_display: Optional[bool] = None):
"""Return check failure as json.

Returned JSON string will have next structure:
Expand All @@ -632,51 +628,53 @@ def to_json(self, with_display: bool = True):
----------
with_display : bool
controls if to serialize display or not
(the parameter is deprecated and does not have any effect since version 0.6.4,
it will be removed in future versions)

Returns
-------
str
"""
# TODO: not sure if the `with_display` parameter is needed
# add deprecation warning if it is not needed
if with_display is not None:
warnings.warn(
'"with_display" parameter is deprecated and does not have any effect '
JKL98ISR marked this conversation as resolved.
Show resolved Hide resolved
'since version 0.6.4, it will be removed in future versions',
DeprecationWarning
)
return jsonpickle.dumps(
CheckFailureJsonSerializer(self).serialize(),
unpicklable=False
)

def to_wandb(self, dedicated_run: bool = True, **kwargs: Any):
def to_wandb(self, dedicated_run: Optional[bool] = None, **kwargs: Any):
"""Export check result to wandb.

Parameters
----------
dedicated_run : bool , default: None
If to initiate and finish a new wandb run.
If None it will be dedicated if wandb.run is None.
dedicated_run : bool, default True
whether to create a separate wandb run or not
(deprecated parameter, does not have any effect anymore)
kwargs: Keyword arguments to pass to wandb.init.
Default project name is deepchecks.
Default config is the check metadata (params, train/test/ name etc.).
"""
# NOTE: Wandb is not a default dependency
# user should install it manually therefore we are
# doing import within method to prevent premature ImportError
try:
import wandb

from .serialization.check_failure.wandb import CheckFailureSerializer as WandbSerializer
except ImportError as error:
raise ImportError(
'Wandb serializer requires the wandb python package. '
'To get it, run "pip install wandb".'
) from error
else:
dedicated_run = set_wandb_run_state(
dedicated_run,
{'header': self.header, **self.check.metadata()},
**kwargs
assert self.check is not None
from .serialization.check_failure.wandb import CheckFailureSerializer as WandbSerializer

if dedicated_run is not None:
warnings.warn(
'"dedicated_run" parameter is deprecated and does not have effect anymore. '
'It will be remove in next versions.'
)
wandb.log(WandbSerializer(self).serialize())
if dedicated_run:
wandb.finish()

wandb_kwargs = {'config': {'header': self.get_header(), **self.check.metadata()}}
wandb_kwargs.update(**kwargs)

with wandb_run(**wandb_kwargs) as run:
run.log(WandbSerializer(self).serialize())

def __repr__(self):
"""Return string representation."""
Expand Down
3 changes: 2 additions & 1 deletion deepchecks/core/serialization/check_failure/wandb.py
Expand Up @@ -14,13 +14,14 @@
from deepchecks.core import check_result as check_types
from deepchecks.core.serialization.abc import WandbSerializer
from deepchecks.core.serialization.common import prettify
from deepchecks.utils.wandb_utils import WANDB_INSTALLATION_CMD

try:
import wandb
except ImportError as e:
raise ImportError(
'Wandb serializer requires the wandb python package. '
'To get it, run "pip install wandb".'
f'To get it, run - {WANDB_INSTALLATION_CMD}.'
) from e


Expand Down
3 changes: 2 additions & 1 deletion deepchecks/core/serialization/check_result/wandb.py
Expand Up @@ -20,13 +20,14 @@
from deepchecks.core import check_result as check_types
from deepchecks.core.serialization.abc import ABCDisplayItemsHandler, WandbSerializer
from deepchecks.core.serialization.common import aggregate_conditions, concatv_images, normalize_value, prettify
from deepchecks.utils.wandb_utils import WANDB_INSTALLATION_CMD

try:
import wandb
except ImportError as e:
raise ImportError(
'Wandb serializer requires the wandb python package. '
'To get it, run "pip install wandb".'
f'To get it, run - {WANDB_INSTALLATION_CMD}.'
) from e


Expand Down
14 changes: 12 additions & 2 deletions deepchecks/core/serialization/suite_result/json.py
Expand Up @@ -36,9 +36,19 @@ def __init__(self, value: 'suite.SuiteResult', **kwargs):
)
self.value = value

def serialize(self, **kwargs) -> t.Union[t.Dict[t.Any, t.Any], t.List[t.Any]]:
def serialize(
self,
with_display: bool = True,
**kwargs
) -> t.Union[t.Dict[t.Any, t.Any], t.List[t.Any]]:
"""Serialize a SuiteResult instance into JSON format.

Parameters
----------
with_display : bool, default True
whether to include serialized `CheckResult.display` items into
the output or not

Returns
-------
Union[Dict[Any, Any], List[Any]]
Expand All @@ -47,7 +57,7 @@ def serialize(self, **kwargs) -> t.Union[t.Dict[t.Any, t.Any], t.List[t.Any]]:

for it in self.value.results:
if isinstance(it, check_types.CheckResult):
results.append(CheckResultSerializer(it).serialize())
results.append(CheckResultSerializer(it).serialize(with_display=with_display))
elif isinstance(it, check_types.CheckFailure):
results.append(CheckFailureSerializer(it).serialize())
else:
Expand Down
55 changes: 28 additions & 27 deletions deepchecks/core/suite.py
Expand Up @@ -28,7 +28,7 @@
from deepchecks.core.serialization.suite_result.widget import SuiteResultSerializer as SuiteResultWidgetSerializer
from deepchecks.utils.ipython import is_colab_env, is_kaggle_env, is_notebook, is_widgets_enabled
from deepchecks.utils.strings import create_new_file_name, get_random_string, widget_to_html, widget_to_html_string
from deepchecks.utils.wandb_utils import set_wandb_run_state
from deepchecks.utils.wandb_utils import wandb_run

from .serialization.suite_result.ipython import SuiteResultSerializer as SuiteResultIPythonSerializer

Expand All @@ -54,7 +54,14 @@ def __init__(self, name: str, results, extra_info: Optional[List[str]] = None):
self.results = results
self.extra_info = extra_info or []

# TODO: add comment about code below
# NOTE:
# we collect results indexes in order to facilitate results
# filtering and selection via the `select_results` method
#
# Examples:
# >>
# >> sr.select_result(sr.results_with_conditions | sr.results_with_display)
# >> sr.select_results(sr.results_without_conditions & sr.results_with_display)

self.results_with_conditions: Set[int] = set()
self.results_without_conditions: Set[int] = set()
Expand Down Expand Up @@ -229,17 +236,16 @@ def to_json(self, with_display: bool = True):

Parameters
----------
with_display : bool
controls if to serialize display of checks or not
with_display : bool, default True
whether to include serialized `CheckResult.display` items into
the output or not

Returns
-------
str
"""
# TODO: not sure if the `with_display` parameter is needed
# add deprecation warning if it is not needed
return jsonpickle.dumps(
SuiteResultJsonSerializer(self).serialize(),
SuiteResultJsonSerializer(self).serialize(with_display=with_display),
unpicklable=False
)

Expand All @@ -252,9 +258,9 @@ def to_wandb(

Parameters
----------
dedicated_run : bool , default: None
If to initiate and finish a new wandb run.
If None it will be dedicated if wandb.run is None.
dedicated_run : bool
whether to create a separate wandb run or not
(deprecated parameter, does not have any effect anymore)
kwargs: Keyword arguments to pass to wandb.init.
Default project name is deepchecks.
Default config is the suite name.
Expand All @@ -265,24 +271,19 @@ def to_wandb(
# doing import within method to prevent premature ImportError
# TODO:
# Previous implementation used ProgressBar to show serialization progress
try:
import wandb

from deepchecks.core.serialization.suite_result.wandb import SuiteResultSerializer as WandbSerializer
except ImportError as error:
raise ImportError(
'Wandb serializer requires the wandb python package. '
'To get it, run "pip install wandb".'
) from error
else:
dedicated_run = set_wandb_run_state(
dedicated_run,
{'name': self.name},
**kwargs
from deepchecks.core.serialization.suite_result.wandb import SuiteResultSerializer as WandbSerializer

if dedicated_run is not None:
warnings.warn(
'"dedicated_run" parameter is deprecated and does not have effect anymore. '
'It will be remove in next versions.'
)
wandb.log(WandbSerializer(self).serialize())
if dedicated_run: # TODO: create context manager for this
wandb.finish()

wandb_kwargs = {'config': {'name': self.name}}
wandb_kwargs.update(**kwargs)

with wandb_run(**wandb_kwargs) as run:
run.log(WandbSerializer(self).serialize())

def get_failures(self) -> Dict[str, CheckFailure]:
"""Get all the failed checks.
Expand Down
1 change: 1 addition & 0 deletions deepchecks/utils/metrics.py
Expand Up @@ -27,6 +27,7 @@
from deepchecks.utils.strings import is_string_column
from deepchecks.utils.typing import BasicModel, ClassificationModel


__all__ = [
'ModelType',
'task_type_check',
Expand Down
50 changes: 26 additions & 24 deletions deepchecks/utils/wandb_utils.py
Expand Up @@ -10,43 +10,45 @@
#
# pylint: disable=import-outside-toplevel
"""Wandb utils module."""
from typing import Any
import contextlib
import typing as t

__all__ = ['set_wandb_run_state']
__all__ = ['wandb_run']


def set_wandb_run_state(dedicated_run: bool, default_config: dict, **kwargs: Any):
"""Set wandb run state.
WANDB_INSTALLATION_CMD = 'pip install wandb'


@contextlib.contextmanager
def wandb_run(
project: t.Optional[str] = None,
**kwargs
) -> t.Iterator[t.Any]:
"""Create new one or use existing wandb run instance.

Parameters
----------
dedicated_run : bool
If to initiate and finish a new wandb run.
If None it will be dedicated if wandb.run is None.
default_config : dict
Default config dict.
kwargs: Keyword arguments to pass to wandb.init - relevent if wandb_init is True.
Default project name is deepchecks.
Default config is the check metadata (params, train/test/ name etc.).
project : Optional[str], default None
project name
**kwargs :
additional parameters that will be passed to the 'wandb.init'

Returns
-------
bool
If deticated run
Iterator[wandb.sdk.wandb_run.Run]
"""
try:
import wandb
except ImportError as error:
raise ImportError(
'"set_wandb_run_state" requires the wandb python package. '
'To get it, run "pip install wandb".'
'"wandb_run" requires the wandb python package. '
f'To get it, run - {WANDB_INSTALLATION_CMD}.'
) from error
else:
if dedicated_run is None:
dedicated_run = wandb.run is None
if dedicated_run:
kwargs['project'] = kwargs.get('project', 'deepchecks')
kwargs['config'] = kwargs.get('config', default_config)
wandb.init(**kwargs)
wandb.run._label(repo='Deepchecks') # pylint: disable=protected-access
return dedicated_run
if wandb.run is not None:
yield wandb.run
else:
kwargs = {'project': project or 'deepchecks', **kwargs}
with t.cast(t.ContextManager, wandb.init(**kwargs)) as run:
wandb.run._label(repo='Deepchecks') # pylint: disable=protected-access
yield run
1 change: 1 addition & 0 deletions requirements/dev-requirements.txt
Expand Up @@ -37,4 +37,5 @@ scipy>=1.4.1
tqdm>=4.41.0
seaborn>=0.11.0
wandb>=0.12.15
protobuf>=3.12.0,<4.0dev # wandb does not work if version is higer that 4.0
beautifulsoup4>=4.11.1