Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/deepchecks/MLChecks into 0.6.x
Browse files Browse the repository at this point in the history
  • Loading branch information
ItayGabbay committed Apr 28, 2022
2 parents 86b67f8 + e679b25 commit df80eb1
Show file tree
Hide file tree
Showing 189 changed files with 2,516 additions and 954 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ jobs:
sudo apt-get install pandoc dvisvgm texlive texlive-latex-extra
- name: Build documentation
run: make docs

- name: Validate examples
run: make validate-examples
- name: Upload documentation
if: github.event_name != 'pull_request'
uses: actions/upload-artifact@v2
Expand Down
19 changes: 3 additions & 16 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -96,24 +96,11 @@ docs.error.log
*MNIST*

# build folders of sphinx gallery
docs/source/examples/vision/guides/examples/
docs/source/examples/general/examples/
docs/source/examples/vision/checks/distribution/examples/
docs/source/examples/vision/checks/performance/examples/
docs/source/examples/tabular/guides/examples/
docs/source/examples/tabular/checks/distribution/examples/
docs/source/examples/tabular/checks/overview/examples/
docs/source/examples/tabular/checks/integrity/examples/
docs/source/examples/tabular/checks/methodology/examples/
docs/source/examples/tabular/checks/performance/examples/
docs/source/examples/tabular/use-cases/examples/
docs/source/examples/tabular/checks/methodology/examples/
docs/source/tutorials/tabular/examples
docs/source/tutorials/vision/examples
docs/source/examples/vision/checks/methodology/examples/
docs/source/user-guide/general/customizations/examples/
docs/source/user-guide/general/exporting_results/examples/
docs/source/tutorials/tabular/examples/
docs/source/checks_gallery/
docs/source/auto_tutorials/tabular
docs/source/auto_tutorials/vision

# build artifacts from running docs (vision and wandb export)
docs/source/tutorials/vision/*.html
Expand Down
1 change: 1 addition & 0 deletions deepchecks/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
'CategoryMismatchTrainTest',
'NewLabelTrainTest',
'LabelAmbiguity',
'OutlierSampleDetection',

# methodology checks
'BoostingOverfit',
Expand Down
75 changes: 58 additions & 17 deletions deepchecks/core/check_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
import numpy as np
import ipywidgets as widgets
import plotly.graph_objects as go
import plotly
from ipywidgets.embed import embed_minimal_html, dependency_state
from plotly.basedatatypes import BaseFigure
import plotly.io as pio
import plotly
from matplotlib import pyplot as plt
from IPython.display import display_html
from pandas.io.formats.style import Styler
Expand All @@ -34,7 +34,7 @@
from deepchecks.core.display_pandas import dataframe_to_html, get_conditions_table
from deepchecks.core.errors import DeepchecksValueError
from deepchecks.utils.dataframes import un_numpy
from deepchecks.utils.strings import get_docs_summary
from deepchecks.utils.strings import create_new_file_name, get_docs_summary, widget_to_html
from deepchecks.utils.ipython import is_notebook
from deepchecks.utils.wandb_utils import set_wandb_run_state

Expand Down Expand Up @@ -130,6 +130,7 @@ def display_check(self, unique_id: str = None, as_widget: bool = False,
"""
if as_widget:
box = widgets.VBox()
box.add_class('rendered_html')
box_children = []
check_html = ''
if unique_id:
Expand Down Expand Up @@ -185,14 +186,34 @@ def display_check(self, unique_id: str = None, as_widget: bool = False,
return box
display_html(check_html, raw=True)

def _repr_html_(self):
def _repr_html_(self, unique_id=None,
show_additional_outputs=True, requirejs: bool = False):
"""Return html representation of check result."""
html_out = io.StringIO()
widgeted_output = self.display_check(as_widget=True)
embed_minimal_html(html_out, views=[widgeted_output], requirejs=False,
embed_url=None, state=dependency_state(widgeted_output))
self.save_as_html(html_out, unique_id=unique_id,
show_additional_outputs=show_additional_outputs, requirejs=requirejs)
return html_out.getvalue()

def save_as_html(self, file=None, unique_id=None,
show_additional_outputs=True, requirejs: bool = True):
"""Save output as html file.
Parameters
----------
file : filename or file-like object
The file to write the HTML output to. If None writes to output.html
requirejs: bool , default: True
If to save with all javascript dependencies
"""
if file is None:
file = 'output.html'
widgeted_output = self.display_check(unique_id=unique_id,
show_additional_outputs=show_additional_outputs,
as_widget=True)
if isinstance(file, str):
file = create_new_file_name(file, 'html')
widget_to_html(widgeted_output, html_out=file, title=self.get_header(), requirejs=requirejs)

def _display_to_json(self) -> List[Tuple[str, str]]:
displays = []
old_backend = matplotlib.get_backend()
Expand Down Expand Up @@ -362,7 +383,7 @@ def _get_metadata(self, with_doc_link: bool = False):
def _ipython_display_(self, unique_id=None, as_widget=False,
show_additional_outputs=True):
check_widget = self.display_check(unique_id=unique_id, as_widget=as_widget,
show_additional_outputs=show_additional_outputs,)
show_additional_outputs=show_additional_outputs)
if as_widget:
display_html(check_widget)

Expand Down Expand Up @@ -423,11 +444,27 @@ def priority(self) -> int:

return 4

def show(self, unique_id=None, show_additional_outputs=True):
"""Display check result."""
def show(self, show_additional_outputs=True, unique_id=None):
"""Display the check result.
Parameters
----------
show_additional_outputs : bool
Boolean that controls if to show additional outputs.
unique_id : str
The unique id given by the suite that displays the check.
"""
if is_notebook():
self._ipython_display_(unique_id=unique_id,
show_additional_outputs=show_additional_outputs)
self.display_check(unique_id=unique_id,
show_additional_outputs=show_additional_outputs)
elif 'sphinx_gallery' in pio.renderers.default:
html = self._repr_html_(unique_id=unique_id,
show_additional_outputs=show_additional_outputs)

class TempSphinx:
def _repr_html_(self):
return html
return TempSphinx()
else:
warnings.warn('You are running in a non-interactive python shell. in order to show result you have to use '
'an IPython shell (etc Jupyter)')
Expand Down Expand Up @@ -464,7 +501,7 @@ def to_json(self, with_display: bool = True):
"""
result_json = self._get_metadata()
if with_display:
result_json['display'] = [('str', str(self.exception))]
result_json['display'] = [('html', f'<p style="color:red">{self.exception}</p>')]
return jsonpickle.dumps(result_json, unpicklable=False)

def to_wandb(self, dedicated_run: bool = True, **kwargs: Any):
Expand Down Expand Up @@ -501,15 +538,19 @@ def _get_metadata(self, with_doc_link: bool = False):

def __repr__(self):
"""Return string representation."""
tb_str = traceback.format_exception(etype=type(self.exception), value=self.exception,
tb=self.exception.__traceback__)
return ''.join(tb_str)
return self.header + ': ' + str(self.exception)

def _ipython_display_(self):
"""Display the check failure."""
check_html = f'<h4>{self.header}</h4>'
if hasattr(self.check.__class__, '__doc__'):
summary = get_docs_summary(self.check)
check_html += f'<p>{summary}</p>'
check_html += f'<p style="color:red"> {self.exception}</p>'
check_html += f'<p style="color:red">{self.exception}</p>'
display_html(check_html, raw=True)

def print_traceback(self):
"""Print the traceback of the failure."""
tb_str = traceback.format_exception(etype=type(self.exception), value=self.exception,
tb=self.exception.__traceback__)
print(''.join(tb_str))
100 changes: 51 additions & 49 deletions deepchecks/core/check_utils/single_feature_contribution_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,41 @@
import plotly.graph_objects as go


def get_pps_figure(per_class: bool):
"""If per_class is True, then no title is defined on the figure."""
fig = go.Figure()
fig.update_layout(
yaxis_title='Predictive Power Score (PPS)',
yaxis_range=[0, 1.05],
legend=dict(x=1.0, y=1.0),
barmode='group',
width=800, height=500
)
if per_class:
fig.update_layout(xaxis_title='Class')
else:
fig.update_layout(
title='Predictive Power Score (PPS) - Can a feature predict the label by itself?',
xaxis_title='Column',
)
return fig


def pps_df_to_trace(s_pps: pd.Series, name: str):
"""If name is train/test use our defined colors, else will use plotly defaults."""
name = name.capitalize() if name else None
return go.Bar(x=s_pps.index,
y=s_pps,
name=name,
marker_color=colors.get(name),
text=s_pps.round(2),
textposition='outside'
)


def get_single_feature_contribution(train_df: pd.DataFrame, train_label_name: Optional[Hashable], test_df: pd.DataFrame,
test_label_name: Optional[Hashable], ppscore_params: dict, n_show_top: int):
test_label_name: Optional[Hashable], ppscore_params: dict, n_show_top: int,
random_state: int = None):
"""
Calculate the PPS for train, test and difference for single feature contribution checks.
Expand All @@ -42,18 +75,20 @@ def get_single_feature_contribution(train_df: pd.DataFrame, train_label_name: Op
dictionary of additional parameters for the ppscore predictor function
n_show_top: int
Number of features to show, sorted by the magnitude of difference in PPS
random_state: int, default None
Random state for the ppscore.predictors function
Returns:
CheckResult
value: dictionaries of PPS values for train, test and train-test difference.
display: bar graph of the PPS of each feature.
"""
df_pps_train = pps.predictors(df=train_df, y=train_label_name,
random_seed=42,
random_seed=random_state,
**ppscore_params)
df_pps_test = pps.predictors(df=test_df,
y=test_label_name,
random_seed=42, **ppscore_params)
random_seed=random_state, **ppscore_params)

s_pps_train = df_pps_train.set_index('x', drop=True)['ppscore']
s_pps_test = df_pps_test.set_index('x', drop=True)['ppscore']
Expand All @@ -65,34 +100,16 @@ def get_single_feature_contribution(train_df: pd.DataFrame, train_label_name: Op
s_pps_train_to_display = s_pps_train[s_difference_to_display.index]
s_pps_test_to_display = s_pps_test[s_difference_to_display.index]

fig = go.Figure()
fig.add_trace(go.Bar(x=s_pps_train_to_display.index,
y=s_pps_train_to_display,
name='Train',
marker_color=colors['Train'], text=s_pps_train_to_display.round(2), textposition='outside'
))
fig.add_trace(go.Bar(x=s_pps_test_to_display.index,
y=s_pps_test_to_display,
name='Test',
marker_color=colors['Test'], text=s_pps_test_to_display.round(2), textposition='outside'
))
fig = get_pps_figure(per_class=False)
fig.add_trace(pps_df_to_trace(s_pps_train_to_display, 'train'))
fig.add_trace(pps_df_to_trace(s_pps_test_to_display, 'test'))
fig.add_trace(go.Scatter(x=s_difference_to_display.index,
y=s_difference_to_display,
name='Train-Test Difference (abs)',
marker=dict(symbol='circle', size=15),
line=dict(color='#aa57b5', width=5)
))

fig.update_layout(
title='Predictive Power Score (PPS) - Can a feature predict the label by itself?',
xaxis_title='Column',
yaxis_title='Predictive Power Score (PPS)',
yaxis_range=[0, 1.05],
legend=dict(x=1.0, y=1.0),
barmode='group',
width=800, height=500
)

ret_value = {'train': s_pps_train.to_dict(), 'test': s_pps_test.to_dict(),
'train-test difference': s_difference.to_dict()}

Expand All @@ -106,7 +123,8 @@ def get_single_feature_contribution_per_class(train_df: pd.DataFrame, train_labe
test_df: pd.DataFrame,
test_label_name: Optional[Hashable], ppscore_params: dict,
n_show_top: int,
min_pps_to_show: float = 0.05):
min_pps_to_show: float = 0.05,
random_state: int = None):
"""
Calculate the PPS for train, test and difference for single feature contribution checks per class.
Expand All @@ -130,6 +148,8 @@ def get_single_feature_contribution_per_class(train_df: pd.DataFrame, train_labe
Number of features to show, sorted by the magnitude of difference in PPS
min_pps_to_show: float, default 0.05
Minimum PPS to show a class in the graph
random_state: int, default None
Random state for the ppscore.predictors function
Returns:
CheckResult
Expand All @@ -153,11 +173,11 @@ def get_single_feature_contribution_per_class(train_df: pd.DataFrame, train_labe
lambda x: 1 if x == c else 0) # pylint: disable=cell-var-from-loop

df_pps_train = pps.predictors(df=train_df_all_vs_one, y=train_label_name,
random_seed=42,
random_seed=random_state,
**ppscore_params)
df_pps_test = pps.predictors(df=test_df_all_vs_one,
y=test_label_name,
random_seed=42, **ppscore_params)
random_seed=random_state, **ppscore_params)

s_pps_train = df_pps_train.set_index('x', drop=True)['ppscore']
s_pps_test = df_pps_test.set_index('x', drop=True)['ppscore']
Expand All @@ -183,28 +203,10 @@ def get_single_feature_contribution_per_class(train_df: pd.DataFrame, train_labe
s_train_to_display = s_train[s_difference_to_display.index]
s_test_to_display = s_test[s_difference_to_display.index]

fig = go.Figure()
fig.add_trace(go.Bar(x=s_train_to_display.index.astype(str),
y=s_train_to_display,
name='Train',
marker_color=colors['Train'], text=s_train_to_display.round(2), textposition='outside'
))
fig.add_trace(go.Bar(x=s_test_to_display.index.astype(str),
y=s_test_to_display,
name='Test',
marker_color=colors['Test'], text=s_test_to_display.round(2), textposition='outside'
))

fig.update_layout(
title=f'{feature}: Predictive Power Score (PPS) Per Class',
xaxis_title='Class',
yaxis_title='Predictive Power Score (PPS)',
yaxis_range=[0, 1.05],
legend=dict(x=1.0, y=1.0),
barmode='group',
width=800, height=400
)

fig = get_pps_figure(per_class=True)
fig.update_layout(title=f'{feature}: Predictive Power Score (PPS) Per Class')
fig.add_trace(pps_df_to_trace(s_train_to_display, 'train'))
fig.add_trace(pps_df_to_trace(s_test_to_display, 'test'))
display.append(fig)

return ret_value, display

0 comments on commit df80eb1

Please sign in to comment.