# Getting Started Tutorial

To install Evidently using the pip package manager, run:

```$ pip install evidently```


If you want to see reports inside a Jupyter notebook, you need to also install the Jupyter nbextension. After installing evidently, run the two following commands in the terminal from the Evidently directory.

To install jupyter nbextension, run:

```$ jupyter nbextension install --sys-prefix --symlink --overwrite --py evidently```

To enable it, run:

```$ jupyter nbextension enable evidently --py --sys-prefix```

That's it!

In [1]:
try:
    import evidently
except:
    !npm install -g yarn
    !pip install git+https://github.com/evidentlyai/evidently.git

[K[?25h
> yarn@1.22.19 preinstall /tools/node/lib/node_modules/yarn
> :; (node ./preinstall.js > /dev/null 2>&1 || true)

/tools/node/bin/yarn -> /tools/node/lib/node_modules/yarn/bin/yarn.js
/tools/node/bin/yarnpkg -> /tools/node/lib/node_modules/yarn/bin/yarn.js
+ yarn@1.22.19
added 1 package in 1.237s
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/evidentlyai/evidently.git
  Cloning https://github.com/evidentlyai/evidently.git to /tmp/pip-req-build-yp3popjj
  Running command git clone -q https://github.com/evidentlyai/evidently.git /tmp/pip-req-build-yp3popjj
Collecting dataclasses>=0.6
  Downloading dataclasses-0.6-py3-none-any.whl (14 kB)
Collecting PyYAML~=5.1
  Downloading PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl (662 kB)
[K     |████████████████████████████████| 662 kB 4.0 MB/s 
Building wheels for collected packages: evidently
  Building wheel for evidently (setup.py) ... [?25l[?25hd

In [2]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, RegressionPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset
from evidently.tests import *

In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

## Load Data

In [4]:
data = fetch_california_housing(as_frame=True)
housing_data = data.frame

In [5]:
housing_data.rename(columns={'MedHouseVal': 'target'}, inplace=True)
housing_data['prediction'] = housing_data['target'].values + np.random.normal(0, 5, housing_data.shape[0])

In [6]:
reference = housing_data.sample(n=5000, replace=False)
current = housing_data.sample(n=5000, replace=False)

## Report

In [7]:
report = Report(metrics=[
    DataDriftPreset(), 
])

report.run(reference_data=reference, current_data=current)
report

Output hidden; open in https://colab.research.google.com to view.

In [None]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    ColumnQuantileMetric(column_name='AveRooms', quantile=0.25),
    ColumnDriftMetric(column_name='AveRooms'),
    
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report = Report(metrics=[
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns=['AveRooms', 'AveBedrms']),
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report = Report(metrics=[
    ColumnSummaryMetric(column_name='AveRooms'),
    generate_column_metrics(ColumnQuantileMetric, parameters={'quantile':0.25}, columns='num'),
    DataDriftPreset()
])

report.run(reference_data=reference, current_data=current)
report

In [None]:
report.as_dict()

{'metrics': [{'metric': 'ColumnSummaryMetric',
   'result': {'column_name': 'AveRooms',
    'column_type': 'num',
    'reference_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'mean': 5.41,
     'std': 2.62,
     'min': 1.13,
     'p25': 4.46,
     'p50': 5.22,
     'p75': 6.01,
     'max': 132.53,
     'unique': 4902,
     'unique_percentage': 98.04,
     'missing': 0,
     'missing_percentage': 0.0,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 6.0,
     'most_common_percentage': 0.18},
    'current_characteristics': {'number_of_rows': 5000,
     'count': 5000,
     'mean': 5.46,
     'std': 2.76,
     'min': 0.85,
     'p25': 4.44,
     'p50': 5.22,
     'p75': 6.08,
     'max': 132.53,
     'unique': 4904,
     'unique_percentage': 98.08,
     'missing': 0,
     'missing_percentage': 0.0,
     'infinite_count': 0,
     'infinite_percentage': 0.0,
     'most_common': 4.4,
     'most_common_percentage': 0.08}}},
  {'metric': 'Colu

In [None]:
report.json()

'{"version": "0.2.0", "timestamp": "2022-12-07 21:51:33.765568", "metrics": [{"metric": "ColumnSummaryMetric", "result": {"column_name": "AveRooms", "column_type": "num", "reference_characteristics": {"number_of_rows": 5000, "count": 5000, "mean": 5.41, "std": 2.62, "min": 1.13, "p25": 4.46, "p50": 5.22, "p75": 6.01, "max": 132.53, "unique": 4902, "unique_percentage": 98.04, "missing": 0, "missing_percentage": 0.0, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 6.0, "most_common_percentage": 0.18}, "current_characteristics": {"number_of_rows": 5000, "count": 5000, "mean": 5.46, "std": 2.76, "min": 0.85, "p25": 4.44, "p50": 5.22, "p75": 6.08, "max": 132.53, "unique": 4904, "unique_percentage": 98.08, "missing": 0, "missing_percentage": 0.0, "infinite_count": 0, "infinite_percentage": 0.0, "most_common": 4.4, "most_common_percentage": 0.08}}}, {"metric": "ColumnQuantileMetric", "result": {"column_name": "AveBedrms", "quantile": 0.25, "current": 1.0070422535211268, "refer

In [None]:
#report.save_html('report.html')

In [None]:
#report.save_json('report.json')

## Test Suite 

In [None]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
])

tests.run(reference_data=reference, current_data=current)
tests

In [None]:
suite = TestSuite(tests=[
    NoTargetPerformanceTestPreset(),
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestMeanInNSigmas('HouseAge'),
    NoTargetPerformanceTestPreset(columns=['AveRooms', 'AveBedrms', 'AveOccup'])
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite = TestSuite(tests=[
    TestColumnDrift('Population'),
    TestShareOfOutRangeValues('Population'),
    generate_column_tests(TestMeanInNSigmas, columns='num'),
    
])

suite.run(reference_data=reference, current_data=current)
suite

In [None]:
suite.as_dict()

{'tests': [{'name': 'Drift per Column',
   'description': 'The drift score for the feature **Population** is 0.024. The drift detection method is Wasserstein distance (normed). The drift detection threshold is 0.1.',
   'status': 'SUCCESS',
   'group': 'data_drift',
   'parameters': {'features': {'Population': {'stattest_name': 'Wasserstein distance (normed)',
      'score': 0.024,
      'stattest_threshold': 0.1,
      'data_drift': False}}}},
  {'name': 'Share of Out-of-Range Values',
   'description': 'The share of values out of range in the column **Population** is 0.0006 (3 out of 5000).  The test threshold is eq=0 ± 1e-12.',
   'status': 'FAIL',
   'group': 'data_quality',
   'parameters': {'condition': {'eq': 0 ± 1e-12},
    'left': None,
    'right': None,
    'share_not_in_range': 0.0006}},
  {'name': 'Mean Value Stability',
   'description': 'The mean value of the column **AveBedrms** is 1.1. The expected range is from -0.05 to 2.23',
   'status': 'SUCCESS',
   'group': 'data

In [None]:
suite.json()

'{"version": "0.2.0", "timestamp": "2022-12-07 21:51:56.737031", "tests": [{"name": "Drift per Column", "description": "The drift score for the feature **Population** is 0.024. The drift detection method is Wasserstein distance (normed). The drift detection threshold is 0.1.", "status": "SUCCESS", "group": "data_drift", "parameters": {"features": {"Population": {"stattest_name": "Wasserstein distance (normed)", "score": 0.024, "stattest_threshold": 0.1, "data_drift": false}}}}, {"name": "Share of Out-of-Range Values", "description": "The share of values out of range in the column **Population** is 0.0006 (3 out of 5000).  The test threshold is eq=0 \\u00b1 1e-12.", "status": "FAIL", "group": "data_quality", "parameters": {"condition": {"eq": {"value": 0, "relative": 1e-06, "absolute": 1e-12}}, "left": null, "right": null, "share_not_in_range": 0.0006}}, {"name": "Mean Value Stability", "description": "The mean value of the column **AveBedrms** is 1.1. The expected range is from -0.05 t

In [None]:
#suite.save_html('test_suite.html')

In [None]:
#suite.save_json('test_suite.json')