Skip to content

Commit

Permalink
Merge branch 'develop' into pre_pr-prototype/alexsherstinsky/rule_bas…
Browse files Browse the repository at this point in the history
…ed_profiler/fix_values_range_self_initializing_expectation-2022_05_09-124
  • Loading branch information
Alex Sherstinsky committed May 9, 2022
2 parents dcf945c + cff5ec3 commit 300c33d
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 78 deletions.
@@ -1,7 +1,7 @@
import copy
from abc import abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple

import altair as alt
import pandas as pd
Expand All @@ -19,6 +19,14 @@
AltairDataTypes,
AltairThemes,
)
from great_expectations.rule_based_profiler.types.data_assistant_result.plot_components import (
BatchPlotComponent,
DomainPlotComponent,
ExpectationKwargPlotComponent,
MetricPlotComponent,
PlotComponent,
determine_plot_title,
)
from great_expectations.rule_based_profiler.types.data_assistant_result.plot_result import (
PlotResult,
)
Expand Down Expand Up @@ -188,34 +196,45 @@ def get_line_chart(
Returns:
An altair line chart
"""
metric_title: str = metric_name.replace("_", " ").title()
domain_title: str = domain_name.title()

title: Union[str, alt.TitleParams] = f"{metric_title} per {domain_title}"
if subtitle:
title = alt.TitleParams(title, subtitle=[subtitle])
metric_component: MetricPlotComponent = MetricPlotComponent(
name=metric_name, alt_type=metric_type
)
domain_component: DomainPlotComponent = DomainPlotComponent(
name=domain_name, alt_type=domain_type, subtitle=subtitle
)
batch_component: BatchPlotComponent = BatchPlotComponent(
name="batch_id", alt_type=AltairDataTypes.NOMINAL.value
)
return DataAssistantResult._get_line_chart(
df=df,
metric_component=metric_component,
domain_component=domain_component,
batch_component=batch_component,
)

batch_id: str = "batch_id"
batch_id_title: str = batch_id.replace("_", " ").title().replace("Id", "ID")
batch_id_type: alt.StandardType = AltairDataTypes.NOMINAL.value
@staticmethod
def _get_line_chart(
df: pd.DataFrame,
metric_component: MetricPlotComponent,
domain_component: DomainPlotComponent,
batch_component: BatchPlotComponent,
) -> alt.Chart:
title: alt.TitleParams = determine_plot_title(
metric_plot_component=metric_component,
domain_plot_component=domain_component,
)

tooltip: List[alt.Tooltip] = [
alt.Tooltip(field=batch_id, type=batch_id_type, title=batch_id_title),
alt.Tooltip(
field=metric_name, type=metric_type, title=metric_title, format=","
),
batch_component.generate_tooltip(),
metric_component.generate_tooltip(format=","),
]

line: alt.Chart = (
alt.Chart(data=df, title=title)
.mark_line()
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(metric_name, type=metric_type, title=metric_title),
x=domain_component.plot_on_axis(),
y=metric_component.plot_on_axis(),
tooltip=tooltip,
)
)
Expand All @@ -224,20 +243,16 @@ def get_line_chart(
alt.Chart(data=df, title=title)
.mark_point()
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(metric_name, type=metric_type, title=metric_title),
x=domain_component.plot_on_axis(),
y=metric_component.plot_on_axis(),
tooltip=tooltip,
)
)

return line + points

@staticmethod
def get_expect_values_to_be_between_chart(
def get_expect_domain_values_to_be_between_chart(
df: pd.DataFrame,
metric_name: str,
metric_type: alt.StandardType,
Expand All @@ -252,53 +267,72 @@ def get_expect_values_to_be_between_chart(
metric_type: The altair data type for the metric being plotted
domain_name: The name of the domain as it exists in the pandas dataframe
domain_type: The altair data type for the domain being plotted
subtitle: The subtitle to add for a domain such as "Column: column_name"
Returns:
An altair line chart with confidence intervals corresponding to "between" expectations
"""
line_color: alt.HexColor = alt.HexColor(ColorPalettes.HEATMAP_6.value[4])
metric_component: MetricPlotComponent = MetricPlotComponent(
name=metric_name, alt_type=metric_type
)
domain_component: DomainPlotComponent = DomainPlotComponent(
name=domain_name, alt_type=domain_type, subtitle=subtitle
)
batch_component: BatchPlotComponent = BatchPlotComponent(
name="batch_id", alt_type=AltairDataTypes.NOMINAL.value
)
min_value_component: ExpectationKwargPlotComponent = (
ExpectationKwargPlotComponent(
name="min_value",
alt_type=AltairDataTypes.QUANTITATIVE.value,
metric_plot_component=metric_component,
)
)
max_value_component: ExpectationKwargPlotComponent = (
ExpectationKwargPlotComponent(
name="max_value",
alt_type=AltairDataTypes.QUANTITATIVE.value,
metric_plot_component=metric_component,
)
)

metric_title: str = metric_name.replace("_", " ").title()
domain_title: str = domain_name.title()
return DataAssistantResult._get_expect_domain_values_to_be_between_chart(
df=df,
metric_component=metric_component,
domain_component=domain_component,
batch_component=batch_component,
min_value_component=min_value_component,
max_value_component=max_value_component,
)

title: Union[str, alt.TitleParams] = f"{metric_title} per {domain_title}"
if subtitle:
title = alt.TitleParams(title, subtitle=[subtitle])
@staticmethod
def _get_expect_domain_values_to_be_between_chart(
df: pd.DataFrame,
metric_component: MetricPlotComponent,
domain_component: DomainPlotComponent,
batch_component: BatchPlotComponent,
min_value_component: PlotComponent,
max_value_component: PlotComponent,
) -> alt.Chart:
line_color: alt.HexColor = alt.HexColor(ColorPalettes.HEATMAP_6.value[4])

batch_id: str = "batch_id"
batch_id_title: str = batch_id.replace("_", " ").title().replace("Id", "ID")
batch_id_type: alt.StandardType = AltairDataTypes.NOMINAL.value
min_value: str = "min_value"
min_value_title: str = min_value.replace("_", " ").title()
min_value_type: alt.StandardType = AltairDataTypes.QUANTITATIVE.value
max_value: str = "max_value"
max_value_title: str = max_value.replace("_", " ").title()
max_value_type: alt.StandardType = AltairDataTypes.QUANTITATIVE.value
title: alt.TitleParams = determine_plot_title(
metric_plot_component=metric_component,
domain_plot_component=domain_component,
)

tooltip: List[alt.Tooltip] = [
alt.Tooltip(field=batch_id, type=batch_id_type, title=batch_id_title),
alt.Tooltip(
field=metric_name, type=metric_type, title=metric_title, format=","
),
alt.Tooltip(
field=min_value, type=min_value_type, title=min_value_title, format=","
),
alt.Tooltip(
field=max_value, type=max_value_type, title=max_value_title, format=","
),
batch_component.generate_tooltip(),
metric_component.generate_tooltip(format=","),
min_value_component.generate_tooltip(format=","),
max_value_component.generate_tooltip(format=","),
]

lower_limit: alt.Chart = (
alt.Chart(data=df)
.mark_line(color=line_color)
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(min_value, type=metric_type, title=metric_title),
x=domain_component.plot_on_axis(),
y=min_value_component.plot_on_axis(),
tooltip=tooltip,
)
.properties(title=title)
Expand All @@ -308,12 +342,8 @@ def get_expect_values_to_be_between_chart(
alt.Chart(data=df)
.mark_line(color=line_color)
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(max_value, type=metric_type, title=metric_title),
x=domain_component.plot_on_axis(),
y=max_value_component.plot_on_axis(),
tooltip=tooltip,
)
.properties(title=title)
Expand All @@ -323,26 +353,22 @@ def get_expect_values_to_be_between_chart(
alt.Chart(data=df)
.mark_area()
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(min_value, title=metric_title, type=metric_type),
y2=alt.Y2(max_value, title=metric_title),
x=domain_component.plot_on_axis(),
y=min_value_component.plot_on_axis(),
y2=alt.Y2(max_value_component.name, title=metric_component.title),
)
.properties(title=title)
)

line: alt.Chart = DataAssistantResult.get_line_chart(
line: alt.Chart = DataAssistantResult._get_line_chart(
df=df,
metric_name=metric_name,
metric_type=metric_type,
domain_name=domain_name,
domain_type=domain_type,
metric_component=metric_component,
domain_component=domain_component,
batch_component=batch_component,
)

# encode point color based on anomalies
metric_name: str = metric_component.name
predicate: alt.expr.core.BinaryExpression = (
(alt.datum.min_value > alt.datum[metric_name])
& (alt.datum.max_value > alt.datum[metric_name])
Expand Down
@@ -0,0 +1,120 @@
from dataclasses import dataclass
from typing import Optional, Union

import altair as alt


@dataclass(frozen=True)
class PlotComponent:
name: str
alt_type: alt.StandardType

@property
def title(self) -> str:
return self.name.replace("_", " ").title()

def generate_tooltip(self, format: str = "") -> alt.Tooltip:
"""Wrapper arount alt.Tooltip creation.
Args:
format (str): Desired format within tooltip
Returns:
An instance of alt.Tooltip containing relevant information from the PlotComponent class.
"""
return alt.Tooltip(
field=self.name,
type=self.alt_type,
title=self.title,
format=format,
)

def plot_on_axis(self) -> Union[alt.X, alt.Y]:
"""Wrapper around alt.X/alt.Y plotting utility.
Returns:
Either an alt.X or alt.Y instance based on desired axis.
"""
raise NotImplementedError


@dataclass(frozen=True)
class MetricPlotComponent(PlotComponent):
def plot_on_axis(self) -> alt.Y:
"""
Plots metric on Y axis - see parent `PlotComponent` for more details.
"""
return alt.Y(
self.name,
type=self.alt_type,
title=self.title,
)


@dataclass(frozen=True)
class DomainPlotComponent(PlotComponent):
subtitle: Optional[str] = None

@property
def title(self) -> str:
return self.name.title()

def plot_on_axis(self) -> alt.X:
"""
Plots domain on X axis - see parent `PlotComponent` for more details.
"""
return alt.X(
self.name,
type=self.alt_type,
title=self.title,
)


@dataclass(frozen=True)
class BatchPlotComponent(PlotComponent):
@property
def title(self) -> str:
return self.name.replace("_", " ").title().replace("Id", "ID")


@dataclass(frozen=True)
class ExpectationKwargPlotComponent(PlotComponent):
metric_plot_component: MetricPlotComponent

def plot_on_axis(self) -> alt.Y:
"""
Plots domain on Y axis - see parent `PlotComponent` for more details.
"""
return alt.Y(
self.name,
type=self.metric_plot_component.alt_type,
title=self.metric_plot_component.title,
)


def determine_plot_title(
metric_plot_component: MetricPlotComponent,
domain_plot_component: DomainPlotComponent,
) -> alt.TitleParams:
"""Determines the appropriate title for a chart based on input componentsself.
Conditionally renders a subtitle if relevant (specifically with column domain)
Args:
metric_plot_component: Plot utility corresponding to a given metric.
domain_plot_component: Plot utility corresponding to a given domain.
Returns:
An Altair TitleParam object
"""
contents: str = f"{metric_plot_component.title} per {domain_plot_component.title}"
subtitle: Optional[str] = domain_plot_component.subtitle

title: alt.TitleParams
if subtitle:
title = alt.TitleParams(contents, subtitle=[subtitle])
else:
title = alt.TitleParams(contents)

return title
Expand Up @@ -217,7 +217,7 @@ def _chart_domain_values(
alt.Chart,
]
if prescriptive:
return_impl = self.get_expect_values_to_be_between_chart
return_impl = self.get_expect_domain_values_to_be_between_chart
else:
return_impl = self.get_line_chart

Expand Down

0 comments on commit 300c33d

Please sign in to comment.