<a href="https://colab.research.google.com/github/gr3ybr0w/cookbook/blob/master/plotting/Altair/Pareto_in_altair.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import altair as alt
from typing import List, Tuple, Optional

In [3]:
def create_pareto_chart(
    categories: List[str],
    values: List[float],
    title: str = "Pareto Chart",
    color_scheme: str = "blues"
) -> alt.Chart:
    """
    Creates a Pareto chart using Altair with frequency on the left axis and
    cumulative percentage on the right axis.

    Args:
        categories (List[str]): List of category names
        values (List[float]): List of corresponding values for each category
        title (str, optional): Title of the chart. Defaults to "Pareto Chart"
        color_scheme (str, optional): Color scheme for the bars. Defaults to "blues"

    Returns:
        alt.Chart: An Altair chart object containing the Pareto visualization

    Example:
        >>> categories = ['A', 'B', 'C', 'D']
        >>> values = [100, 40, 20, 10]
        >>> chart = create_pareto_chart(categories, values, "Product Defects")
    """
    # Create DataFrame with the data
    df = pd.DataFrame({
        'category': categories,
        'value': values
    })

    # Sort by value in descending order
    df = df.sort_values('value', ascending=False)

    # Calculate cumulative percentage
    total = df['value'].sum()
    df['cumulative_percentage'] = 100 * df['value'].cumsum() / total

    # Base chart properties
    base = alt.Chart(df).encode(
        x=alt.X('category:N', sort='-y', title='Category')
    ).properties(
        width=600,
        height=400,
        title=title
    )

    # Create the bar chart with left y-axis
    bars = base.mark_bar().encode(
        y=alt.Y('value:Q',
                title='Frequency',
                axis=alt.Axis(titleColor='#1f77b4')),
        color=alt.Color('value:Q',
                       scale=alt.Scale(scheme=color_scheme),
                       legend=None)
    )

    # Create the line chart with right y-axis
    line = base.mark_line(color='red', strokeWidth=2).encode(
        y=alt.Y('cumulative_percentage:Q',
                title='Cumulative Percentage',
                axis=alt.Axis(titleColor='red', format='%', orient='right'))
    )

    # Add points to the line
    points = base.mark_circle(color='red', size=50).encode(
        y=alt.Y('cumulative_percentage:Q')
    )

    # Layer the visualizations
    return alt.layer(bars, line, points).resolve_scale(
        y='independent'  # This creates the dual-axis effect
    )

# Create example data
categories = ['Product A', 'Product B', 'Product C', 'Product D', 'Product E']
values = [850, 480, 320, 180, 120]

# Create the chart
chart = create_pareto_chart(
    categories=categories,
    values=values,
    title="Product Sales Distribution"
)

# Display the chart
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
