Skip to content

Commit

Permalink
[Refactoring] Put all utility routines within one package. (#196)
Browse files Browse the repository at this point in the history
* utils refactoring;


Co-authored-by: Yuriy Romanyshyn <yuriy.romanyshyn.lv.ua@gmail.com>
  • Loading branch information
yromanyshyn and YuriyRomanyshynUA committed Nov 30, 2021
1 parent a6a9b59 commit c6f79e4
Show file tree
Hide file tree
Showing 78 changed files with 428 additions and 317 deletions.
3 changes: 2 additions & 1 deletion deepchecks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import matplotlib
import matplotlib.pyplot as plt

from .utils import is_notebook
from .utils.ipython import is_notebook

# Matplotlib has multiple backends. If we are in a context that does not support GUI (For example, during unit tests)
# we can't use a GUI backend. Thus we must use a non-GUI backend.
if not is_notebook():
Expand Down
1 change: 1 addition & 0 deletions deepchecks/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .dataset import *
from .check import *
from .suite import *

4 changes: 2 additions & 2 deletions deepchecks/base/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
from pandas.io.formats.style import Styler

from deepchecks.base.display_pandas import display_dataframe
from deepchecks.string_utils import split_camel_case
from deepchecks.utils import DeepchecksValueError
from deepchecks.utils.strings import split_camel_case
from deepchecks.errors import DeepchecksValueError


class Condition:
Expand Down
8 changes: 5 additions & 3 deletions deepchecks/base/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import pandas as pd
from pandas.core.dtypes.common import is_float_dtype

from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.utils import DeepchecksValueError
from deepchecks.string_utils import is_string_column
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.errors import DeepchecksValueError
from deepchecks.utils.strings import is_string_column


__all__ = ['Dataset', 'ensure_dataframe_type']


logger = logging.getLogger('deepchecks.dataset')


Expand Down
2 changes: 1 addition & 1 deletion deepchecks/base/display_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from deepchecks.base.check import CheckResult, CheckFailure
from deepchecks.base.display_pandas import dataframe_to_html, display_dataframe
from deepchecks.utils import is_widgets_enabled
from deepchecks.utils.ipython import is_widgets_enabled
import pandas as pd

__all__ = ['display_suite_result', 'ProgressBar']
Expand Down
2 changes: 1 addition & 1 deletion deepchecks/base/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Union, List, Optional

from deepchecks.base.display_suite import display_suite_result, ProgressBar
from deepchecks.utils import DeepchecksValueError
from deepchecks.errors import DeepchecksValueError
from deepchecks.base import Dataset
from deepchecks.base.check import (
BaseCheck, CheckResult, TrainTestBaseCheck, CompareDatasetsBaseCheck,
Expand Down
4 changes: 2 additions & 2 deletions deepchecks/checks/distribution/train_test_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

from deepchecks import Dataset, CheckResult, TrainTestBaseCheck, ConditionResult
from deepchecks.checks.distribution.plot import plot_density
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null
from deepchecks.utils import DeepchecksValueError
from deepchecks.utils.features import calculate_feature_importance_or_null
from deepchecks.errors import DeepchecksValueError
import matplotlib.pyplot as plt

__all__ = ['TrainTestDrift']
Expand Down
8 changes: 5 additions & 3 deletions deepchecks/checks/distribution/trust_score_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from deepchecks.checks.distribution.trust_score import TrustScore
from deepchecks.checks.distribution.preprocessing import preprocess_dataset_to_scaled_numerics
from deepchecks.checks.distribution.plot import plot_density
from deepchecks.metric_utils import task_type_check, ModelType
from deepchecks.string_utils import format_percent
from deepchecks.utils import DeepchecksValueError, model_type_validation
from deepchecks.utils.metrics import task_type_check, ModelType
from deepchecks.utils.strings import format_percent
from deepchecks.utils.validation import model_type_validation
from deepchecks.errors import DeepchecksValueError


__all__ = ['TrustScoreComparison']

Expand Down
6 changes: 3 additions & 3 deletions deepchecks/checks/integrity/data_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

from deepchecks import Dataset, ensure_dataframe_type
from deepchecks.base.check import CheckResult, SingleDatasetBaseCheck, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.utils import DeepchecksValueError
from deepchecks.string_utils import format_percent
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.strings import format_percent
from deepchecks.errors import DeepchecksValueError


__all__ = ['DataDuplicates']
Expand Down
8 changes: 4 additions & 4 deletions deepchecks/checks/integrity/dominant_frequency_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

from deepchecks import Dataset
from deepchecks.base.check import CheckResult, CompareDatasetsBaseCheck, ConditionResult
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import format_percent
from deepchecks.errors import DeepchecksValueError

__all__ = ['DominantFrequencyChange']

from deepchecks.string_utils import format_percent
from deepchecks.utils import DeepchecksValueError
__all__ = ['DominantFrequencyChange']


class DominantFrequencyChange(CompareDatasetsBaseCheck):
Expand Down
5 changes: 3 additions & 2 deletions deepchecks/checks/integrity/is_single_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from typing import Union, Iterable
import pandas as pd
from deepchecks import SingleDatasetBaseCheck, CheckResult, ensure_dataframe_type, Dataset, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.string_utils import format_columns_for_condition
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.strings import format_columns_for_condition


__all__ = ['IsSingleValue']

Expand Down
2 changes: 1 addition & 1 deletion deepchecks/checks/integrity/label_ambiguity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from deepchecks import Dataset, ConditionResult
from deepchecks.base.check import CheckResult, SingleDatasetBaseCheck
from deepchecks.string_utils import format_percent
from deepchecks.utils.strings import format_percent


__all__ = ['LabelAmbiguity']
Expand Down
9 changes: 5 additions & 4 deletions deepchecks/checks/integrity/mixed_nulls.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@

from deepchecks import Dataset, CheckResult, ensure_dataframe_type
from deepchecks.base.check import SingleDatasetBaseCheck, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import string_baseform, format_percent, format_columns_for_condition
from deepchecks.errors import DeepchecksValueError

from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.string_utils import string_baseform, format_percent, format_columns_for_condition
from deepchecks.utils import DeepchecksValueError

__all__ = ['MixedNulls']


DEFAULT_NULL_VALUES = {'none', 'null', 'nan', 'na', '', '\x00', '\x00\x00'}


Expand Down
7 changes: 3 additions & 4 deletions deepchecks/checks/integrity/mixed_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@

from deepchecks import Dataset, ensure_dataframe_type
from deepchecks.base.check import CheckResult, SingleDatasetBaseCheck, ConditionResult
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import is_string_column, format_percent, format_columns_for_condition


__all__ = ['MixedTypes']

from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.string_utils import is_string_column, format_percent, format_columns_for_condition


class MixedTypes(SingleDatasetBaseCheck):
"""Search for various types of data in (a) column[s], including hidden mixes in strings.
Expand Down
5 changes: 3 additions & 2 deletions deepchecks/checks/integrity/new_category.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""The data_sample_leakage_report check module."""
from typing import Union, Iterable, Dict
import pandas as pd

from deepchecks import Dataset
from deepchecks.base.check import CheckResult, TrainTestBaseCheck, ConditionResult
from deepchecks.string_utils import format_percent, format_columns_for_condition
from deepchecks.utils.strings import format_percent, format_columns_for_condition

import pandas as pd

__all__ = ['CategoryMismatchTrainTest']

Expand Down
3 changes: 2 additions & 1 deletion deepchecks/checks/integrity/new_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

from deepchecks import Dataset
from deepchecks.base.check import CheckResult, TrainTestBaseCheck, ConditionResult
from deepchecks.string_utils import format_percent
from deepchecks.utils.strings import format_percent

import pandas as pd

pd.options.mode.chained_assignment = None


__all__ = ['NewLabelTrainTest']


Expand Down
8 changes: 4 additions & 4 deletions deepchecks/checks/integrity/rare_format_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import pandas as pd

from deepchecks import CheckResult, Dataset, SingleDatasetBaseCheck, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.base.dataset import ensure_dataframe_type
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_dict
from deepchecks.string_utils import split_and_keep, split_by_order, format_percent
from deepchecks.utils import DeepchecksValueError
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_dict
from deepchecks.utils.strings import split_and_keep, split_by_order, format_percent
from deepchecks.errors import DeepchecksValueError


__all__ = ['RareFormatDetection', 'Pattern']
Expand Down
7 changes: 4 additions & 3 deletions deepchecks/checks/integrity/special_chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

from deepchecks import Dataset, ensure_dataframe_type
from deepchecks.base.check import CheckResult, SingleDatasetBaseCheck, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.string_utils import string_baseform, format_percent, format_columns_for_condition
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import string_baseform, format_percent, format_columns_for_condition


__all__ = ['SpecialCharacters']

Expand Down
7 changes: 4 additions & 3 deletions deepchecks/checks/integrity/string_length_out_of_bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
from scipy import stats

from deepchecks import CheckResult, SingleDatasetBaseCheck, Dataset, ensure_dataframe_type, ConditionResult
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.string_utils import is_string_column, format_number, format_columns_for_condition, format_percent
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import is_string_column, format_number, format_columns_for_condition, format_percent
from deepchecks.utils.dataframes import filter_columns_with_validation


__all__ = ['StringLengthOutOfBounds']

Expand Down
18 changes: 14 additions & 4 deletions deepchecks/checks/integrity/string_mismatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@

import pandas as pd

from deepchecks import CheckResult, SingleDatasetBaseCheck, Dataset, ensure_dataframe_type, ConditionResult, \
from deepchecks import (
CheckResult,
SingleDatasetBaseCheck,
Dataset,
ensure_dataframe_type,
ConditionResult,
ConditionCategory
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.string_utils import get_base_form_to_variants_dict, is_string_column, format_percent, \
)
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.strings import (
get_base_form_to_variants_dict,
is_string_column,
format_percent,
format_columns_for_condition
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
)


__all__ = ['StringMismatch']
Expand Down
7 changes: 4 additions & 3 deletions deepchecks/checks/integrity/string_mismatch_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import pandas as pd

from deepchecks import CheckResult, Dataset, ensure_dataframe_type, CompareDatasetsBaseCheck, ConditionResult
from deepchecks.base.dataframe_utils import filter_columns_with_validation
from deepchecks.string_utils import get_base_form_to_variants_dict, is_string_column, format_percent, \
from deepchecks.utils.dataframes import filter_columns_with_validation
from deepchecks.utils.strings import get_base_form_to_variants_dict, is_string_column, format_percent, \
format_columns_for_condition
from deepchecks.feature_importance_utils import calculate_feature_importance_or_null, column_importance_sorter_df
from deepchecks.utils.features import calculate_feature_importance_or_null, column_importance_sorter_df


__all__ = ['StringMismatchComparison']

Expand Down
7 changes: 4 additions & 3 deletions deepchecks/checks/methodology/boosting_overfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import numpy as np

from deepchecks import Dataset, CheckResult, TrainTestBaseCheck, ConditionResult
from deepchecks.metric_utils import task_type_check, DEFAULT_METRICS_DICT, validate_scorer, DEFAULT_SINGLE_METRIC
from deepchecks.string_utils import format_percent
from deepchecks.utils import DeepchecksValueError
from deepchecks.utils.metrics import task_type_check, DEFAULT_METRICS_DICT, validate_scorer, DEFAULT_SINGLE_METRIC
from deepchecks.utils.strings import format_percent
from deepchecks.errors import DeepchecksValueError


__all__ = ['BoostingOverfit']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import pandas as pd

from deepchecks import CheckResult, Dataset, TrainTestBaseCheck, ConditionResult
from deepchecks.string_utils import format_percent
from deepchecks.utils.strings import format_percent


__all__ = ['DateTrainTestLeakageDuplicates']

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""The date_leakage check module."""
from deepchecks import CheckResult, Dataset, TrainTestBaseCheck, ConditionResult
from deepchecks.string_utils import format_percent
from deepchecks.utils.strings import format_percent


__all__ = ['DateTrainTestLeakageOverlap']

Expand Down
8 changes: 4 additions & 4 deletions deepchecks/checks/methodology/identifier_leakage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import pandas as pd

import deepchecks.ppscore as pps
from deepchecks import Dataset
from deepchecks.base.check import CheckResult, SingleDatasetBaseCheck, ConditionResult
from deepchecks.plot_utils import create_colorbar_barchart_for_check
from deepchecks.string_utils import format_percent
from deepchecks.utils import DeepchecksValueError
import deepchecks.ppscore as pps
from deepchecks.utils.plot import create_colorbar_barchart_for_check
from deepchecks.utils.strings import format_percent
from deepchecks.errors import DeepchecksValueError


__all__ = ['IdentifierLeakage']
Expand Down
3 changes: 2 additions & 1 deletion deepchecks/checks/methodology/index_leakage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from deepchecks import CheckResult, Dataset, TrainTestBaseCheck
from deepchecks.base.check import ConditionResult
from deepchecks.string_utils import format_percent
from deepchecks.utils.strings import format_percent


__all__ = ['IndexTrainTestLeakage']

Expand Down
6 changes: 3 additions & 3 deletions deepchecks/checks/methodology/performance_overfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import pandas as pd
import numpy as np

from deepchecks.string_utils import format_percent
from deepchecks.utils import model_type_validation
from deepchecks.metric_utils import get_metrics_list
from deepchecks.utils.strings import format_percent
from deepchecks.utils.validation import model_type_validation
from deepchecks.utils.metrics import get_metrics_list
from deepchecks import (
Dataset,
CheckResult,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import typing as t

import deepchecks.ppscore as pps
from deepchecks.plot_utils import create_colorbar_barchart_for_check
from deepchecks.utils.plot import create_colorbar_barchart_for_check
from deepchecks import CheckResult, Dataset, SingleDatasetBaseCheck, ConditionResult


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import deepchecks.ppscore as pps
from deepchecks import CheckResult, Dataset, TrainTestBaseCheck, ConditionResult
from deepchecks.plot_utils import create_colorbar_barchart_for_check
from deepchecks.utils.plot import create_colorbar_barchart_for_check



Expand Down

0 comments on commit c6f79e4

Please sign in to comment.