From eef382e93dc66a228cce92bd667a2f4a90a3eafd Mon Sep 17 00:00:00 2001 From: Nadav-Barak <67195469+Nadav-Barak@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:34:42 +0300 Subject: [PATCH] Quick start model evaluation + new dataset (#1726) * Part 1 - converting tree leaves into filters * Part 1 - improvements * Apply suggestions from code review Co-authored-by: Nir Hutnik <92314933+nirhutnik@users.noreply.github.com> * Part 1 - pr comments * Apply suggestions from code review Co-authored-by: Nir Hutnik <92314933+nirhutnik@users.noreply.github.com> * Part 1 - pr comments v2 * Weak segment performance check - without display and docs * merge with master * Improve run be removing unnecessary operations * Edge cases - small or empty datasets * Display for check * Pr comments * display categorical features * Massages + import fix * import fix ver 2 * example page * example page v2 * pr comments * fixed CheckResultJson * pylint * Nir comments * plot name * docstring * bla * pr comments * Apply suggestions from code review Co-authored-by: shir22 <33841818+shir22@users.noreply.github.com> * more pr comments * isort * fix tutorial bug * added a dataset * link to API reference * model eval quick start * shir comments * quick start * changes * comments fixed * text * condition to check * bressler's pr comments * Apply suggestions from code review Co-authored-by: shir22 <33841818+shir22@users.noreply.github.com> Co-authored-by: Nir Hutnik <92314933+nirhutnik@users.noreply.github.com> Co-authored-by: Itay Gabbay Co-authored-by: Yurii Romanyshyn Co-authored-by: shir22 <33841818+shir22@users.noreply.github.com> --- .../model_evaluation/model_error_analysis.py | 3 +- .../train_test_prediction_drift.py | 3 +- .../weak_segments_performance.py | 5 +- deepchecks/tabular/context.py | 3 +- .../tabular/datasets/regression/__init__.py | 4 +- .../datasets/regression/wine_quality.py | 126 ++++++++++++++ deepchecks/utils/single_sample_metrics.py | 2 +- .../plot_weak_segments_performance.py | 28 +-- docs/source/getting-started/welcome.rst | 6 +- .../tutorials/plot_quick_data_integrity.py | 32 ++-- .../tutorials/plot_quick_model_evaluation.py | 164 ++++++++++++++++++ .../plot_quick_train_test_validation.py | 34 ++-- .../tutorials/plot_quickstart_in_5_minutes.py | 4 +- tests/tabular/test_datasets.py | 6 +- 14 files changed, 355 insertions(+), 65 deletions(-) create mode 100644 deepchecks/tabular/datasets/regression/wine_quality.py create mode 100644 docs/source/user-guide/tabular/tutorials/plot_quick_model_evaluation.py diff --git a/deepchecks/tabular/checks/model_evaluation/model_error_analysis.py b/deepchecks/tabular/checks/model_evaluation/model_error_analysis.py index 8a78af4530..7bc37c16d5 100644 --- a/deepchecks/tabular/checks/model_evaluation/model_error_analysis.py +++ b/deepchecks/tabular/checks/model_evaluation/model_error_analysis.py @@ -14,8 +14,7 @@ from sklearn import preprocessing -from deepchecks import CheckFailure -from deepchecks.core import CheckResult, ConditionCategory, ConditionResult +from deepchecks.core import CheckFailure, CheckResult, ConditionCategory, ConditionResult from deepchecks.core.errors import DeepchecksProcessError from deepchecks.tabular import Context, Dataset, TrainTestCheck from deepchecks.tabular.utils.task_type import TaskType diff --git a/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py b/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py index 5ad6997e8b..a9a3449ec5 100644 --- a/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py +++ b/deepchecks/tabular/checks/model_evaluation/train_test_prediction_drift.py @@ -16,8 +16,7 @@ import numpy as np import pandas as pd -from deepchecks import ConditionCategory -from deepchecks.core import CheckResult, ConditionResult +from deepchecks.core import CheckResult, ConditionCategory, ConditionResult from deepchecks.tabular import Context, TrainTestCheck from deepchecks.tabular.utils.task_type import TaskType from deepchecks.utils.distribution.drift import (SUPPORTED_CATEGORICAL_METHODS, SUPPORTED_NUMERIC_METHODS, diff --git a/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py b/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py index e536d49098..125b99228c 100644 --- a/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py +++ b/deepchecks/tabular/checks/model_evaluation/weak_segments_performance.py @@ -21,11 +21,10 @@ from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeRegressor -from deepchecks import ConditionCategory, ConditionResult, Dataset -from deepchecks.core import CheckResult +from deepchecks.core import CheckResult, ConditionCategory, ConditionResult from deepchecks.core.check_result import DisplayMap from deepchecks.core.errors import DeepchecksNotSupportedError, DeepchecksProcessError -from deepchecks.tabular import Context, SingleDatasetCheck +from deepchecks.tabular import Context, Dataset, SingleDatasetCheck from deepchecks.tabular.context import _DummyModel from deepchecks.tabular.utils.task_type import TaskType from deepchecks.utils.dataframes import default_fill_na_per_column_type diff --git a/deepchecks/tabular/context.py b/deepchecks/tabular/context.py index b0da0e4c82..7cef31e27f 100644 --- a/deepchecks/tabular/context.py +++ b/deepchecks/tabular/context.py @@ -14,8 +14,7 @@ import numpy as np import pandas as pd -from deepchecks import CheckFailure, CheckResult -from deepchecks.core import DatasetKind +from deepchecks.core import CheckFailure, CheckResult, DatasetKind from deepchecks.core.errors import (DatasetValidationError, DeepchecksNotSupportedError, DeepchecksValueError, ModelValidationError) from deepchecks.tabular._shared_docs import docstrings diff --git a/deepchecks/tabular/datasets/regression/__init__.py b/deepchecks/tabular/datasets/regression/__init__.py index 915d33c4bb..1befe2755e 100644 --- a/deepchecks/tabular/datasets/regression/__init__.py +++ b/deepchecks/tabular/datasets/regression/__init__.py @@ -9,6 +9,6 @@ # ---------------------------------------------------------------------------- # """Module for working with pre-built regression datasets.""" -from . import avocado +from . import avocado, wine_quality -__all__ = ['avocado'] +__all__ = ['avocado', 'wine_quality'] diff --git a/deepchecks/tabular/datasets/regression/wine_quality.py b/deepchecks/tabular/datasets/regression/wine_quality.py new file mode 100644 index 0000000000..689bedd354 --- /dev/null +++ b/deepchecks/tabular/datasets/regression/wine_quality.py @@ -0,0 +1,126 @@ +# ---------------------------------------------------------------------------- +# Copyright (C) 2021-2022 Deepchecks (https://www.deepchecks.com) +# +# This file is part of Deepchecks. +# Deepchecks is distributed under the terms of the GNU Affero General +# Public License (version 3 or later). +# You should have received a copy of the GNU Affero General Public License +# along with Deepchecks. If not, see . +# ---------------------------------------------------------------------------- +# +"""The wine quality dataset contains data on different wines and their overall quality.""" +import typing as t +from urllib.request import urlopen + +import joblib +import pandas as pd +import sklearn +from category_encoders import OneHotEncoder +from sklearn.compose import ColumnTransformer +from sklearn.ensemble import RandomForestRegressor +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler + +from deepchecks.tabular.dataset import Dataset + +__all__ = ['load_data', 'load_fitted_model'] +_MODEL_URL = 'https://ndownloader.figshare.com/files/36146916' +_FULL_DATA_URL = 'https://ndownloader.figshare.com/files/36146853' +_TRAIN_DATA_URL = 'https://ndownloader.figshare.com/files/36146856' +_TEST_DATA_URL = 'https://ndownloader.figshare.com/files/36146859' +_MODEL_VERSION = '1.0.2' +_target = 'quality' +_CAT_FEATURES = [] +_NUM_FEATURES = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', + 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', + 'pH', 'sulphates', 'alcohol'] + + +def load_data(data_format: str = 'Dataset', as_train_test: bool = True) -> \ + t.Union[t.Tuple, t.Union[Dataset, pd.DataFrame]]: + """Load and returns the Wine Quality dataset (regression). + + The data has 1599 records with 11 features and one ordinal target column, referring to the overall quality + of a specific wine. see https://www.kaggle.com/datasets/uciml/red-wine-quality-cortez-et-al-2009 + for additional information. + + The typical ML task in this dataset is to build a model that predicts the overall quality of Wine. + + This dataset is licensed under the Open Data Commons Open Database License (ODbL) v1.0 + (https://opendatacommons.org/licenses/odbl/1-0/). + Right reserved to P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. + Modeling wine preferences by data mining from physicochemical properties. + In Decision Support Systems, Elsevier, 47(4):547-553, 2009. + + Parameters + ---------- + data_format : str , default: Dataset + Represent the format of the returned value. Can be 'Dataset'|'Dataframe' + 'Dataset' will return the data as a Dataset object + 'Dataframe' will return the data as a pandas Dataframe object + as_train_test : bool , default: True + If True, the returned data is splitted into train and test exactly like the toy model + was trained. The first return value is the train data and the second is the test data. + In order to get this model, call the load_fitted_model() function. + Otherwise, returns a single object. + + Returns + ------- + dataset : Union[deepchecks.Dataset, pd.DataFrame] + the data object, corresponding to the data_format attribute. + train_data, test_data : Tuple[Union[deepchecks.Dataset, pd.DataFrame],Union[deepchecks.Dataset, pd.DataFrame] + tuple if as_train_test = True. Tuple of two objects represents the dataset splitted to train and test sets. + """ + if not as_train_test: + dataset = pd.read_csv(_FULL_DATA_URL) + + if data_format == 'Dataset': + dataset = Dataset(dataset, label=_target, cat_features=_CAT_FEATURES) + + return dataset + else: + train = pd.read_csv(_TRAIN_DATA_URL) + test = pd.read_csv(_TEST_DATA_URL) + + if data_format == 'Dataset': + train = Dataset(train, label=_target, cat_features=_CAT_FEATURES) + test = Dataset(test, label=_target, cat_features=_CAT_FEATURES) + + return train, test + + +def load_fitted_model(pretrained=True): + """Load and return a fitted regression model to predict the quality in the Wine Quality dataset. + + Returns + ------- + model : Joblib + the model/pipeline that was trained on the Wine Quality dataset. + + """ + if sklearn.__version__ == _MODEL_VERSION and pretrained: + with urlopen(_MODEL_URL) as f: + model = joblib.load(f) + else: + model = _build_model() + train, _ = load_data() + model.fit(train.data[train.features], train.data[train.label_name]) + joblib.dump(model, 'wine_quality_model.sav') + return model + + +def _build_model(): + """Build the model to fit.""" + return Pipeline(steps=[ + ('preprocessor', + ColumnTransformer(transformers=[('num', + Pipeline(steps=[('imputer', + SimpleImputer(strategy='median')), + ('scaler', + StandardScaler())]), + _NUM_FEATURES), + ('cat', OneHotEncoder(), + _CAT_FEATURES)])), + ('classifier', RandomForestRegressor(random_state=0, max_depth=7, n_estimators=30)) + ]) diff --git a/deepchecks/utils/single_sample_metrics.py b/deepchecks/utils/single_sample_metrics.py index d9d75246a2..ee555d57e9 100644 --- a/deepchecks/utils/single_sample_metrics.py +++ b/deepchecks/utils/single_sample_metrics.py @@ -16,8 +16,8 @@ from sklearn import metrics from sklearn.preprocessing import LabelBinarizer -from deepchecks import Dataset from deepchecks.core.errors import DeepchecksNotImplementedError +from deepchecks.tabular import Dataset from deepchecks.tabular.utils.task_type import TaskType diff --git a/docs/source/checks/tabular/model_evaluation/plot_weak_segments_performance.py b/docs/source/checks/tabular/model_evaluation/plot_weak_segments_performance.py index 4e25675fbb..0f0c0ddb5f 100644 --- a/docs/source/checks/tabular/model_evaluation/plot_weak_segments_performance.py +++ b/docs/source/checks/tabular/model_evaluation/plot_weak_segments_performance.py @@ -1,38 +1,41 @@ # -*- coding: utf-8 -*- """ Weak Segments Performance -******************************** +************************* This notebooks provides an overview for using and understanding the weak segment performance check. **Structure:** * `What is the purpose of the check? <#what-is-the-purpose-of-the-check>`__ +* `Automatically detecting weak segments <#automatically-detecting-weak-segments>`__ * `Generate data & model <#generate-data-model>`__ * `Run the check <#run-the-check>`__ * `Define a condition <#define-a-condition>`__ What is the purpose of the check? -================================= +================================== The check is designed to help you easily identify the model's weakest segments in the data provided. In addition, it enables to provide a sublist of the Dataset's features, thus limiting the check to search in interesting subspaces. -How Deepchecks automatically detects weak segments ------------------------------------- +Automatically detecting weak segments +===================================== The check contains several steps: -#. We calculate loss for each sample in the dataset using the provided model via either log-loss or MSE. +#. We calculate loss for each sample in the dataset using the provided model via either log-loss or MSE according + to the task type. -#. Select a subset of features for the the weak segment search. This is done by selecting the features with the highest feature importance to the model provided (within the features selected for check, if limited). +#. Select a subset of features for the weak segment search. This is done by selecting the features with the + highest feature importance to the model provided (within the features selected for check, if limited). -#. We train multiple simple tree based models, each one is trained using exactly two features (out of the ones selected above) to predict the per sample error calculated before. +#. We train multiple simple tree based models, each one is trained using exactly two + features (out of the ones selected above) to predict the per sample error calculated before. -#. We convert each of the leafs in each of the trees into a segment and calculate the segment's performance. - -#. For the model's weakest segments detected we calculate bins for the remaining of the data and calculate the model's +#. We convert each of the leafs in each of the trees into a segment and calculate the segment's performance. For the + weakest segments detected we also calculate the model's performance on data segments surrounding them. """ #%% # Generate data & model @@ -65,8 +68,7 @@ # ``categorical_aggregation_threshold``: By default the check will combine rare categories into a single category called # "Other". This parameter determines the frequency threshold for categories to be mapped into to the "other" category. # -# for additional information on the check's parameters, please refer to the API reference of the check -# :class:`deepchecks.tabular.checks.model_evaluation.WeakSegmentsPerformance`. +# see :class:`deepchecks.tabular.checks.model_evaluation.WeakSegmentsPerformance` for more details. from deepchecks.tabular.datasets.classification import phishing from deepchecks.tabular.checks import WeakSegmentsPerformance @@ -75,7 +77,7 @@ scorer = {'f1': make_scorer(f1_score, average='micro')} _, test_ds = phishing.load_data() model = phishing.load_fitted_model() -check = WeakSegmentsPerformance(columns= ['urlLength', 'numTitles', 'ext', 'entropy'], +check = WeakSegmentsPerformance(columns=['urlLength', 'numTitles', 'ext', 'entropy'], alternative_scorer=scorer, segment_minimum_size_ratio=0.03, categorical_aggregation_threshold=0.05) diff --git a/docs/source/getting-started/welcome.rst b/docs/source/getting-started/welcome.rst index f1efd287eb..2e77e79866 100644 --- a/docs/source/getting-started/welcome.rst +++ b/docs/source/getting-started/welcome.rst @@ -52,11 +52,11 @@ Tabular Data Head over to one of our following quickstart tutorials, and have deepchecks running on your environment in less than 5 min: -- :doc:`Train-Test Validation Quickstart (loans data) ` +- :doc:`Data Integrity Quickstart ` -- :doc:`Data Integrity Quickstart (avocado sales data) ` +- :doc:`Train-Test Validation Quickstart ` -- :doc:`Full Suite (many checks) Quickstart (iris data) ` +- :doc:`Model Evaluation Quickstart ` **Recommended - download the code and run it locally** on the built-in dataset and (optional) model, or **replace them with your own**. diff --git a/docs/source/user-guide/tabular/tutorials/plot_quick_data_integrity.py b/docs/source/user-guide/tabular/tutorials/plot_quick_data_integrity.py index 60cdc384ae..e6e2ea99e2 100644 --- a/docs/source/user-guide/tabular/tutorials/plot_quick_data_integrity.py +++ b/docs/source/user-guide/tabular/tutorials/plot_quick_data_integrity.py @@ -2,8 +2,8 @@ """ .. _quick_data_integrity: -Quickstart - Data Integrity Suite (Avocado Sales Data) -******************************************************* +Quickstart - Data Integrity Suite +********************************* The deepchecks integrity suite is relevant any time you have data that you wish to validate: whether it's on a fresh batch of data, or right before splitting it or using it for training. @@ -13,9 +13,11 @@ .. code-block:: bash - # Before we start, if you don't have deepchecks installed yet, - # make sure to run: - pip install deepchecks -U --quiet #--user + # Before we start, if you don't have deepchecks installed yet, run: + import sys + !{sys.executable} -m pip install deepchecks -U --quiet + + # or install using pip from your python environment """ #%% @@ -49,7 +51,7 @@ def add_dirty_data(df): # Run Deepchecks for Data Integrity # ==================================== # -# Define a Dataset Object +# Create a Dataset Object # ------------------------ # # Create a deepchecks Dataset, including the relevant metadata (label, date, index, etc.). @@ -58,12 +60,12 @@ def add_dirty_data(df): from deepchecks.tabular import Dataset -# We state the categorical features, otherwise they will be automatically inferred, -# which may be less accurate, therefore stating them explicitly is recommended. +# Categorical features can be heuristically inferred, however we +# recommend to state them explicitly to avoid misclassification. -# The label can be passed as a column name or a separate pd.Series / pd.DataFrame +# Metadata attributes are optional. Some checks will run only if specific attributes are declared. -ds = Dataset(dirty_df, cat_features = ['type'], datetime_name='Date', label = 'AveragePrice') +ds = Dataset(dirty_df, cat_features= ['type'], datetime_name='Date', label= 'AveragePrice') #%% # Run the Deepchecks Suite @@ -83,7 +85,7 @@ def add_dirty_data(df): suite_result = integ_suite.run(ds) # Note: the result can be saved as html using suite_result.save_as_html() # or exported to json using suite_result.to_json() -suite_result +suite_result.show() #%% # We can inspect the suite outputs and see that there are a few problems we'd like to fix. @@ -105,7 +107,7 @@ def add_dirty_data(df): # we can also add a condition: single_value_with_condition = IsSingleValue().add_condition_not_single_value() result = single_value_with_condition.run(ds) -result +result.show() #%% @@ -118,7 +120,7 @@ def add_dirty_data(df): ds.data.drop('Is Ripe', axis=1, inplace=True) result = single_value_with_condition.run(ds) -result +result.show() #%% @@ -128,7 +130,7 @@ def add_dirty_data(df): dirty_df.drop('Is Ripe', axis=1, inplace=True) ds = Dataset(dirty_df, cat_features=['type'], datetime_name='Date', label='AveragePrice') result = DataDuplicates().add_condition_ratio_less_or_equal(0).run(ds) -result +result.show() #%% # Rerun Suite on the Fixed Dataset @@ -159,4 +161,4 @@ def add_dirty_data(df): # Additional Outputs section* # # For more info about working with conditions, see the detailed -# :doc:`/user-guide/general/customizations/examples/plot_configure_checks_conditions` guide. +# :doc:`/user-guide/general/customizations/examples/plot_configure_check_conditions` guide. diff --git a/docs/source/user-guide/tabular/tutorials/plot_quick_model_evaluation.py b/docs/source/user-guide/tabular/tutorials/plot_quick_model_evaluation.py new file mode 100644 index 0000000000..0cf5fe820b --- /dev/null +++ b/docs/source/user-guide/tabular/tutorials/plot_quick_model_evaluation.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +""" +.. _quick_model_evaluation: + +Quickstart - Model Evaluation Suite +*********************************** + +The deepchecks model evaluation suite is relevant any time you wish to +evaluate your model. For example: + +- Thorough analysis of the model's performance before deploying it. +- Evaluation of a proposed model during the model selection and optimization stage. +- Checking the model's performance on a new batch of data (with or without comparison to previous data batches). + +Here we'll build a regression model using the wine quality dataset +(:mod:`deepchecks.tabular.datasets.regression.wine_quality`), +to demonstrate how you can run the suite with only a few simple lines of code, +and see which kind of insights it can find. + +.. code-block:: bash + + # Before we start, if you don't have deepchecks installed yet, run: + import sys + !{sys.executable} -m pip install deepchecks -U --quiet + + # or install using pip from your python environment +""" + +#%% +# Prepare Data and Model +# ====================== +# +# Load Data +# ----------- + +from deepchecks.tabular.datasets.regression import wine_quality + +data = wine_quality.load_data(data_format='Dataframe', as_train_test=False) +data.head(2) + +#%% +# Split Data and Train a Simple Model +# ----------------------------------- +# +from sklearn.model_selection import train_test_split +from sklearn.ensemble import GradientBoostingRegressor + +X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data['quality'], test_size=0.2) +gbr = GradientBoostingRegressor() +gbr.fit(X_train, y_train) + +#%% +# Run Deepchecks for Model Evaluation +# =========================================== +# +# Create a Dataset Object +# ------------------------- +# +# Create a deepchecks Dataset, including the relevant metadata (label, date, index, etc.). +# Check out :class:`deepchecks.tabular.Dataset` to see all the column types and attributes +# that can be declared. + +from deepchecks.tabular import Dataset + +# Categorical features can be heuristically inferred, however we +# recommend to state them explicitly to avoid misclassification. + +# Metadata attributes are optional. Some checks will run only if specific attributes are declared. + +train_ds = Dataset(X_train, label=y_train, cat_features=[]) +test_ds = Dataset(X_test, label=y_test, cat_features=[]) + +#%% +# Run the Deepchecks Suite +# -------------------------- +# +# Validate your data with the :class:`deepchecks.tabular.suites.model_evaluation` suite. +# It runs on two datasets and a model, so you can use it to compare the performance of the model between +# any two batches of data (e.g. train data, test data, a new batch of data +# that recently arrived) +# +# Check out the :doc:`"when should you use deepchecks guide" ` +# for some more info about the existing suites and when to use them. + +from deepchecks.tabular.suites import model_evaluation + +evaluation_suite = model_evaluation() +suite_result = evaluation_suite.run(train_ds, test_ds, gbr) +# Note: the result can be saved as html using suite_result.save_as_html() +# or exported to json using suite_result.to_json() +suite_result.show() + +#%% +# Analyzing the results +# -------------------------- +# +# The result showcase a number of interesting insights, first let's inspect the "Didn't Pass" section. +# +# * :doc:`/checks_gallery/tabular/model_evaluation/plot_performance_report` +# check result implies that the model overfitted the training data. +# * :doc:`/checks_gallery/tabular/model_evaluation/plot_regression_systematic_error` +# (test set) check result demonstrate the model small positive bias. +# * :doc:`/checks_gallery/tabular/model_evaluation/plot_weak_segments_performance` +# (test set) check result visualize some specific sub-spaces on which the +# model performs poorly. Examples for those sub-spaces are +# wines with low total sulfur dioxide and wines with high alcohol percentage. +# +# Next, let's examine the "Passed" section. +# +# * :doc:`/checks_gallery/tabular/model_evaluation/plot_simple_model_comparison` check result states that the model +# performs better than naive baseline models, an opposite result could indicate a problem with the model +# or the data it was trained on. +# * :doc:`/checks_gallery/tabular/model_evaluation/plot_boosting_overfit` check +# and the :doc:`/checks_gallery/tabular/model_evaluation/plot_unused_features` check results implies that the +# model has a well calibrating boosting stopping rule and that it make good use on the different data features. +# +# Let's try and fix the overfitting issue found in the model. +# +# Fix the Model and Re-run a Single Check +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +from deepchecks.tabular.checks import PerformanceReport + +gbr = GradientBoostingRegressor(n_estimators=20) +gbr.fit(X_train, y_train) +# Initialize the check and add an optional condition +check = PerformanceReport().add_condition_train_test_relative_degradation_less_than(0.3) +result = check.run(train_ds, test_ds, gbr) +result.show() + +#%% +# +# We mitigated the overfitting to some extent. Additional model tuning is required to overcome +# other issues discussed above. For now, we will update and remove the relevant conditions from the suite. +# +# Updating an Existing Suite +# -------------------------- +# +# To create our own suite, we can start with an empty suite and add checks and condition to it +# (see :doc:`/user-guide/general/customizations/examples/plot_create_a_custom_suite`), or we can start with +# one of the default suites and update it as demonstrated in this section. +# +# let's inspect our model evaluation suite's structure +evaluation_suite + +#%% +# +# Next, we will update the Performance Report condition and remove the Regression Systematic Error check: + +evaluation_suite[0].clean_conditions() +evaluation_suite[0].add_condition_train_test_relative_degradation_less_than(0.3) +evaluation_suite = evaluation_suite.remove(7) + +#%% +# +# Re-run the suite using: + +result = evaluation_suite.run(train_ds, test_ds, gbr) +result.passed(fail_if_warning=False) + +#%% +# +# For more info about working with conditions, see the detailed +# :doc:`/user-guide/general/customizations/examples/plot_configure_check_conditions` guide. diff --git a/docs/source/user-guide/tabular/tutorials/plot_quick_train_test_validation.py b/docs/source/user-guide/tabular/tutorials/plot_quick_train_test_validation.py index 9d51e6b8e7..3cb219bc40 100644 --- a/docs/source/user-guide/tabular/tutorials/plot_quick_train_test_validation.py +++ b/docs/source/user-guide/tabular/tutorials/plot_quick_train_test_validation.py @@ -2,8 +2,8 @@ """ .. _quick_train_test_validation: -Quickstart - Train-Test Validation Suite (Loans Data) -************************************************************ +Quickstart - Train-Test Validation Suite +**************************************** The deepchecks train-test validation suite is relevant any time you wish to validate two data subsets. For example: @@ -12,16 +12,18 @@ training a model or when splitting data for cross-validation) - Comparing a new data batch to previous data batches -Here we'll use a loans dataset +Here we'll use a loans' dataset (:mod:`deepchecks.tabular.datasets.classification.lending_club`), to demonstrate how you can run the suite with only a few simple lines of code, and see which kind of insights it can find. .. code-block:: bash - # Before we start, if you don't have deepchecks installed yet, - # make sure to run: - pip install deepchecks -U --quiet #--user + # Before we start, if you don't have deepchecks installed yet, run: + import sys + !{sys.executable} -m pip install deepchecks -U --quiet + + # or install using pip from your python environment """ #%% @@ -79,19 +81,12 @@ # Create Dataset # ^^^^^^^^^^^^^^^^^^^^^^^^ - from deepchecks.tabular import Dataset -# We state the categorical features, -# otherwise they will be automatically inferred, -# which may be less accurate, therefore stating -# them explicitly is recommended. +# Categorical features can be heuristically inferred, however we +# recommend to state them explicitly to avoid misclassification. -# The label can be passed as a column name or -# as a separate pd.Series / pd.DataFrame - -# all metadata attributes are optional. -# Some checks require specific attributes and otherwise will not run. +# Metadata attributes are optional. Some checks will run only if specific attributes are declared. train_ds = Dataset(train_df, label=label,cat_features=categorical_features, \ index_name=index_name, datetime_name=datetime_name) @@ -108,7 +103,7 @@ # Run the Deepchecks Suite # -------------------------- # -# Validate your data with the :func:`deepchecks.tabular.suites.train_test_validation` suite. +# Validate your data with the :class:`deepchecks.tabular.suites.train_test_validation` suite. # It runs on two datasets, so you can use it to compare any two batches of data (e.g. train data, test data, a new batch of data # that recently arrived) # @@ -125,7 +120,7 @@ #%% # As you can see in the suite's results: the Date Train-Test Leakage check failed, -# indicating # that we may have a problem in the way we've split our data! +# indicating that we may have a problem in the way we've split our data! # We've mixed up data from two years, causing a leakage of future data # in the training dataset. # Let's fix this. @@ -152,7 +147,7 @@ # suite_result = validation_suite.run(train_ds, test_ds) -suite_result +suite_result.show() #%% # @@ -224,3 +219,4 @@ # we can run our new suite using: result = drift_suite.run(train_ds, test_ds) +result.show() diff --git a/docs/source/user-guide/tabular/tutorials/plot_quickstart_in_5_minutes.py b/docs/source/user-guide/tabular/tutorials/plot_quickstart_in_5_minutes.py index d5b2b7f4df..a0bc84d891 100644 --- a/docs/source/user-guide/tabular/tutorials/plot_quickstart_in_5_minutes.py +++ b/docs/source/user-guide/tabular/tutorials/plot_quickstart_in_5_minutes.py @@ -2,8 +2,8 @@ """ .. _quick_full_suite: -Quickstart in 5 minutes -*********************** +Quickstart - Full Suite in 5 Minutes +************************************ In order to run your first Deepchecks Suite all you need to have is the data and model that you wish to validate. More specifically, you need: diff --git a/tests/tabular/test_datasets.py b/tests/tabular/test_datasets.py index 632f594bda..2522a7ceeb 100644 --- a/tests/tabular/test_datasets.py +++ b/tests/tabular/test_datasets.py @@ -16,7 +16,7 @@ from hamcrest import assert_that, instance_of from deepchecks.tabular.datasets.classification import adult, breast_cancer, iris, lending_club, phishing -from deepchecks.tabular.datasets.regression import avocado +from deepchecks.tabular.datasets.regression import avocado, wine_quality from deepchecks.utils.model import get_model_of_pipeline @@ -68,3 +68,7 @@ def test_model_predict_on_avocado(): def test_model_predict_on_lending_club(): assert_dataset_module(lending_club) + + +def test_model_predict_on_wine_quality(): + assert_dataset_module(wine_quality)