In [1]:
import pandas as pd

%load_ext autoreload
%autoreload 2

In [2]:
from mlchecks import Suite, ConditionResult, ConditionCategory, Dataset
from mlchecks.checks import *

def condition_exc(r):
    raise Exception('Failed because I need an example')

data = {
    'col1': ['', '#@$', 'Nan!', '#nan', '<NaN>'],
    'col2': ['gabbay', 'GABBAY!!!', 'is', '...', '?Gabbay?'],
    'col3': [1, 's', 'a', 'b', 'c'],
    'col4': ['a', 'a', 'a', 'a', 'a']
}

dataset = Dataset(pd.DataFrame(data=data))
suite = Suite('My Single Suite',
    IsSingleValue(),
    MixedNulls().add_condition_different_nulls_not_more_than(3),
    StringMismatch().add_condition_no_variants(),
    StringMismatch().add_condition_ratio_variants_not_more_than(0.35),
    MixedTypes().add_condition_rare_type_ratio_not_less_than(0.4),
    MixedTypes().add_condition_rare_type_ratio_not_less_than(0.1),
    RareFormatDetection().add_condition('fail example', condition_exc)
)
suite

My Single Suite: [
	IsSingleValue
	MixedNulls(check_nan=True)
		Conditions:
			0: Not more than 3 different null types for all columns
	StringMismatch
		Conditions:
			0: No string variants for all columns
	StringMismatch_2
		Conditions:
			0: Not more than 35.00% variants for all columns
	MixedTypes
		Conditions:
			0: Rare type ratio is not less than 40.00% of samples in all columns
	MixedTypes_2
		Conditions:
			0: Rare type ratio is not less than 10.00% of samples in all columns
	RareFormatDetection(patterns=[<mlchecks.checks.integrity.rare_format_detection.Pattern object at 0x7fa9f1683910>, <mlchecks.checks.integrity.rare_format_detection.Pattern object at 0x7fa9f16839d0>, <mlchecks.checks.integrity.rare_format_detection.Pattern object at 0x7fa9f1683a30>, <mlchecks.checks.integrity.rare_format_detection.Pattern object at 0x7fa9f1683a90>, <mlchecks.checks.integrity.rare_format_detection.Pattern object at 0x7fa9f1683b20>, <mlchecks.checks.integrity.rare_format_detection.Pattern obje

In [3]:
result = suite.run(validation_dataset=dataset)
result


VBox(children=(HTML(value=''), IntProgress(value=0, bar_style='info', max=7, style=ProgressStyle(bar_color='#9…

Status,Check,Condition,More Info
✖,Mixed Nulls - Validation Dataset,Not more than 3 different null types for all columns,Found columns with more than 3 null types: col1
✖,String Mismatch - Validation Dataset,Not more than 35.00% variants for all columns,"Found columns with variants ratio: {'col1': '100%', 'col2': '60.00%'}"
✖,Mixed Types - Validation Dataset,Rare type ratio is not less than 40.00% of samples in all columns,Found columns with low type ratio: col3
!,String Mismatch - Validation Dataset,No string variants for all columns,"Found columns with variants: {'col1': ['', 'nan'], 'col2': ['gabbay']}"
✓,Mixed Types - Validation Dataset,Rare type ratio is not less than 10.00% of samples in all columns,


Check
Single Value in Column - Validation Dataset


Check,Error
RareFormatDetection,Failed because I need an example


Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Percent of data
Column Name,Value,Unnamed: 2_level_1,Unnamed: 3_level_1
col1,,1,20.00%
col1,#@$,1,20.00%
col1,Nan!,1,20.00%
col1,#nan,1,20.00%
col1,,1,20.00%


Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Count,% In data
Column Name,Base form,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
col1,,,1,20.00%
col1,,#@$,1,20.00%
col1,,#nan,1,20.00%
col1,,,1,20.00%
col1,,Nan!,1,20.00%
col2,gabbay,gabbay,1,20.00%
col2,gabbay,GABBAY!!!,1,20.00%
col2,gabbay,?Gabbay?,1,20.00%


Unnamed: 0,col3
numbers,20.00%
strings,80.00%


Unnamed: 0_level_0,Unnamed: 1_level_0,Value,Count,% In data
Column Name,Base form,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
col1,,,1,20.00%
col1,,#@$,1,20.00%
col1,,#nan,1,20.00%
col1,,,1,20.00%
col1,,Nan!,1,20.00%
col2,gabbay,gabbay,1,20.00%
col2,gabbay,GABBAY!!!,1,20.00%
col2,gabbay,?Gabbay?,1,20.00%


Unnamed: 0,col4
Single unique value,a


Unnamed: 0,col3
numbers,20.00%
strings,80.00%
