# Regression Error Distribution 

## Imports

In [1]:
from deepchecks.tabular import Dataset
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from deepchecks.tabular.checks.performance import RegressionErrorDistribution

In [2]:
from deepchecks.tabular.suites import full_suite
import jsonpickle

## Generating data:

In [3]:
diabetes_df = load_diabetes(return_X_y=False, as_frame=True).frame
train_df, test_df = train_test_split(diabetes_df, test_size=0.33, random_state=42)

train = Dataset(train_df, label='target', cat_features=['sex'])
test = Dataset(test_df, label='target', cat_features=['sex'])

clf = GradientBoostingRegressor(random_state=0)
_ = clf.fit(train.data[train.features], train.data[train.label_name])

In [4]:
err_s = ''
ok_s = ''

In [5]:
result = full_suite().run(train_dataset=train, test_dataset=test, model=clf)
stri = result.to_json()

Full Suite:   0%|          | 0/36 [00:00<?, ? Check/s]

In [6]:
jsonloaded = jsonpickle.loads(stri)
i = 0
for s in jsonloaded['results']:
    i += 1
    try:
        jsonpickle.loads(s)
        if i == 2:
            ok_s = s
    except TypeError:
        err_s = s

In [7]:
ok_s

'{"name": "Columns Info", "params": {"n_top_columns": 10}, "header": "Columns Info - Train Dataset", "summary": "Return the role and logical type of each column.", "value": {"target": "label", "bmi": "numerical feature", "s5": "numerical feature", "bp": "numerical feature", "s2": "numerical feature", "age": "numerical feature", "s3": "numerical feature", "s1": "numerical feature", "s6": "numerical feature", "s4": "numerical feature"}, "display": [{"py/tuple": ["html", "* showing only the top 10 columns, you can change it using n_top_columns param"]}, {"py/tuple": ["dataframe", "[{\\"target\\":\\"label\\",\\"bmi\\":\\"numerical feature\\",\\"s5\\":\\"numerical feature\\",\\"bp\\":\\"numerical feature\\",\\"s2\\":\\"numerical feature\\",\\"age\\":\\"numerical feature\\",\\"s3\\":\\"numerical feature\\",\\"s1\\":\\"numerical feature\\",\\"s6\\":\\"numerical feature\\",\\"s4\\":\\"numerical feature\\"}]"]}]}'

In [8]:
err_s

'{"name": "Train Test Samples Mix", "params": {}, "header": "Train Test Samples Mix", "summary": "Detect samples in the test data that appear also in training data.", "conditions_table": "[{\\"Status\\":\\"V\\",\\"Condition\\":\\"Percentage of test data samples that appear in train data not greater than 10%\\",\\"More Info\\":\\"\\"}]", "value": {"ratio": 0.0, "data": {"py/object": "pandas.core.frame.DataFrame", "py/state": {"_mgr": {"py/reduce": [{"py/type": "pandas.core.internals.managers.BlockManager"}, {"py/tuple": [{"py/tuple": [{"py/reduce": [{"py/function": "pandas._libs.internals._unpickle_block"}, {"py/tuple": [{"py/reduce": [{"py/function": "numpy.core.multiarray._reconstruct"}, {"py/tuple": [{"py/type": "numpy.ndarray"}, {"py/tuple": [0]}, {"py/b64": "Yg=="}]}, {"py/tuple": [1, {"py/tuple": [11, 0]}, {"py/reduce": [{"py/type": "numpy.dtype"}, {"py/tuple": ["O8", false, true]}, {"py/tuple": [3, "|", null, null, null, -1, -1, 63]}]}, false, []]}]}, {"py/reduce": [{"py/type": "

In [9]:
type(err_s)

str

In [10]:
jsonpickle.loads(err_s)

TypeError: all inputs must be Index

## Running RegressionErrorDistribution check (normal distribution):

In [None]:
check = RegressionErrorDistribution()

In [None]:
check.run(test, clf)

### Skewing the data:

In [None]:
test.data[test.label_name] = 150

## Running RegressionErrorDistribution check (abnormal distribution):

In [None]:
check = RegressionErrorDistribution()

In [None]:
check.run(test, clf)