Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Improve Altair plotting extensibility #4923

Merged
merged 16 commits into from Apr 22, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1,2 +1,2 @@
from .default_configuration import ALTAIR_DEFAULT_CONFIGURATION
from .encodings import AltairDataTypes
from .themes import AltairThemes

This file was deleted.

Expand Up @@ -4,13 +4,15 @@


class AltairDataTypes(Enum):
# available data types: https://altair-viz.github.io/user_guide/encoding.html#encoding-data-types
QUANTITATIVE = alt.StandardType("quantitative")
ORDINAL = alt.StandardType("ordinal")
NOMINAL = alt.StandardType("nominal")
TEMPORAL = alt.StandardType("temporal")


class AltairAggregates(Enum):
# available aggregates: https://altair-viz.github.io/user_guide/encoding.html#encoding-channel-options
MEAN = alt.Aggregate("mean")
MEDIAN = alt.Aggregate("median")
MIN = alt.Aggregate("min")
Expand Down
114 changes: 114 additions & 0 deletions great_expectations/rule_based_profiler/types/altair/themes.py
@@ -0,0 +1,114 @@
from enum import Enum
from typing import List

from great_expectations.types import ColorPalettes, Colors

# Size
chart_width: int = 800
chart_height: int = 250

# Font
font: str = "Verdana"

#
# Chart Components
#

# Title
title_align: str = "center"
title_font_size: int = 15
title_color: str = Colors.PURPLE.value

# Both Axes
axis_title_color: str = Colors.PURPLE.value
axis_title_font_size: int = 14
axis_title_padding: int = 10
axis_label_color: str = Colors.BLUE_1.value
axis_label_font_size: int = 12

# X-Axis Only
x_axis_label_angle: int = 0
x_axis_label_flush: bool = True
x_axis_grid: bool = True
# Known vega-lite bug: https://github.com/vega/vega-lite/issues/5732
# forces us to choose between features "interactive scaling" (below) and "tooltips"
# x_axis_selection_type: str = "interval"
# x_axis_selection_bind: str = "scales"

# Y-Axis Only

#
# Color Palettes
#

category_color_scheme: List[str] = ColorPalettes.CATEGORY.value
diverging_color_scheme: List[str] = ColorPalettes.DIVERGING.value
heatmap_color_scheme: List[str] = ColorPalettes.HEATMAP.value
ordinal_color_scheme: List[str] = ColorPalettes.ORDINAL.value

#
# Chart Types
#

# Area
fill_opacity = 0.5

# Line Chart
line_color: str = Colors.BLUE_1.value
line_stroke_width: int = 3
line_opacity: float = 0.9
# Known vega-lite bug: https://github.com/vega/vega-lite/issues/5732
# forces us to choose between features "interactive scaling" and "tooltips" (below)
line_tooltip_content: str = "data"

# Point
point_size: int = 70
point_color: str = Colors.GREEN.value
point_filled: bool = True
point_opacity: float = 1.0
point_tooltip_content: str = "data"


class AltairThemes(Enum):
DEFAULT_THEME = {
"view": {"width": chart_width, "height": chart_height},
"font": font,
"title": {
"align": title_align,
"color": title_color,
"fontSize": title_font_size,
},
"axis": {
"titleFontSize": axis_title_font_size,
"titleColor": axis_title_color,
"titlePadding": axis_title_padding,
"labelFontSize": axis_label_font_size,
"labelColor": axis_label_color,
},
"axisX": {
"labelAngle": x_axis_label_angle,
"labelFlush": x_axis_label_flush,
"grid": x_axis_grid,
},
"range": {
"category": category_color_scheme,
"diverging": diverging_color_scheme,
"heatmap": heatmap_color_scheme,
"ordinal": ordinal_color_scheme,
},
"area": {
NathanFarmer marked this conversation as resolved.
Show resolved Hide resolved
"fillOpacity": fill_opacity,
},
"line": {
"color": line_color,
"strokeWidth": line_stroke_width,
"tooltip": {"content": line_tooltip_content},
},
"point": {
"size": point_size,
"color": point_color,
"filled": point_filled,
"opacity": point_opacity,
"tooltip": {"content": point_tooltip_content},
},
}
@@ -1,3 +1,4 @@
import copy
from abc import abstractmethod
from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Optional
Expand All @@ -6,16 +7,16 @@
import pandas as pd

from great_expectations.core import ExpectationSuite
from great_expectations.core.util import convert_to_json_serializable
from great_expectations.core.util import convert_to_json_serializable, nested_update
from great_expectations.rule_based_profiler.types import (
FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY,
FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY,
Domain,
ParameterNode,
)
from great_expectations.rule_based_profiler.types.altair import (
ALTAIR_DEFAULT_CONFIGURATION,
AltairDataTypes,
AltairThemes,
)
from great_expectations.types import ColorPalettes, Colors, SerializableDictDot

Expand Down Expand Up @@ -64,15 +65,21 @@ def get_attributed_metrics_by_domain(
return metrics_attributed_values_by_domain

@staticmethod
def display(charts: List[alt.Chart]) -> None:
def display(charts: List[alt.Chart], theme: Optional[Dict[str, Any]]) -> None:
"""
Display each chart passed by DataAssistantResult.plot()

Args:
charts: A list of altair chart objects to display
theme: Altair top-level chart configuration dictionary
"""
altair_configuration: Dict[str, Any] = copy.deepcopy(
AltairThemes.DEFAULT_THEME.value
)
if theme is not None:
nested_update(altair_configuration, theme)

chart: alt.Chart
altair_configuration: Dict[str, Any] = ALTAIR_DEFAULT_CONFIGURATION
for chart in charts:
chart.configure(**altair_configuration).display()

Expand All @@ -83,8 +90,6 @@ def get_line_chart(
metric_type: alt.StandardType,
domain_name: str,
domain_type: alt.StandardType,
line_color: Optional[str] = Colors.BLUE_2.value,
point_color: Optional[str] = Colors.GREEN.value,
point_color_condition: Optional[alt.condition] = None,
donaldheppner marked this conversation as resolved.
Show resolved Hide resolved
tooltip: Optional[List[alt.Tooltip]] = None,
) -> alt.Chart:
Expand All @@ -95,8 +100,6 @@ def get_line_chart(
metric_type: The altair data type for the metric being plotted
domain_name: The name of the domain as it exists in the pandas dataframe
domain_type: The altair data type for the domain being plotted
line_color: Hex code for the line color
point_color: Hex code for the point color
point_color_condition: Altair condition for changing the point color
tooltip: Altair tooltip for displaying relevant information on the chart

Expand All @@ -118,7 +121,7 @@ def get_line_chart(

line: alt.Chart = (
alt.Chart(data=df, title=title)
.mark_line(color=line_color)
.mark_line()
.encode(
x=alt.X(
domain_name,
Expand All @@ -133,23 +136,22 @@ def get_line_chart(
if point_color_condition is not None:
points: alt.Chart = (
alt.Chart(data=df, title=title)
.mark_point(opacity=1.0)
.mark_point()
.encode(
x=alt.X(
domain_name,
type=domain_type,
title=domain_title,
),
y=alt.Y(metric_name, type=metric_type, title=metric_title),
stroke=point_color_condition,
fill=point_color_condition,
color=point_color_condition,
tooltip=tooltip,
)
)
else:
points: alt.Chart = (
alt.Chart(data=df, title=title)
.mark_point(stroke=point_color, fill=point_color, opacity=1.0)
.mark_point()
.encode(
x=alt.X(
domain_name,
Expand Down Expand Up @@ -182,9 +184,7 @@ def get_expect_domain_values_to_be_between_chart(
Returns:
An altair line chart with confidence intervals corresponding to "between" expectations
"""
line_opacity: float = 0.9
line_color: alt.HexColor = alt.HexColor(ColorPalettes.HEATMAP.value[4])
fill_opacity: float = 0.5
fill_color: alt.HexColor = alt.HexColor(ColorPalettes.HEATMAP.value[5])

metric_title: str = metric_name.replace("_", " ").title()
Expand All @@ -206,7 +206,7 @@ def get_expect_domain_values_to_be_between_chart(

lower_limit: alt.Chart = (
alt.Chart(data=df)
.mark_line(color=line_color, opacity=line_opacity)
.mark_line(color=line_color)
.encode(
x=alt.X(
domain_name,
Expand All @@ -220,7 +220,7 @@ def get_expect_domain_values_to_be_between_chart(

upper_limit: alt.Chart = (
alt.Chart(data=df)
.mark_line(color=line_color, opacity=line_opacity)
.mark_line(color=line_color)
.encode(
x=alt.X(
domain_name,
Expand All @@ -234,7 +234,7 @@ def get_expect_domain_values_to_be_between_chart(

band: alt.Chart = (
alt.Chart(data=df)
.mark_area(fill=fill_color, fillOpacity=fill_opacity)
.mark_area(fill=fill_color)
.encode(
x=alt.X(
domain_name,
Expand All @@ -246,7 +246,7 @@ def get_expect_domain_values_to_be_between_chart(
)
)

predicate = (
predicate: alt.expr.core.BinaryExpression = (
(alt.datum.min_value > alt.datum.table_row_count)
& (alt.datum.max_value > alt.datum.table_row_count)
) | (
Expand All @@ -273,12 +273,14 @@ def get_expect_domain_values_to_be_between_chart(
@abstractmethod
def plot(
self,
prescriptive: bool = False,
prescriptive: Optional[bool] = False,
theme: Optional[Dict[str, Any]] = None,
) -> None:
"""
Use contents of "DataAssistantResult" object to display mentrics and other detail for visualization purposes.

Args:
prescriptive: Type of plot to generate.
prescriptive: Type of plot to generate, prescriptive if True, descriptive if False
theme: Altair top-level chart configuration dictionary
"""
pass