In [None]:
# ðŸ¤–
import pandas as pd
import numpy as np
import altair as alt
import pointblank as pb

# Create sample data with pandas
np.random.seed(42)
data = pd.DataFrame({
    "category": np.random.choice(["A", "B", "C", "D"], size=100),
    "value": np.random.randn(100) * 10 + 50,
    "count": np.random.randint(1, 100, size=100)
})

# Data validation with pointblank
validation = (
    pb.Validate(data=data)
    .col_vals_not_null(columns="category")
    .col_vals_not_null(columns="value")
    .col_vals_between(columns="count", left=1, right=100, inclusive=(True, True))
    .col_vals_in_set(columns="category", set=["A", "B", "C", "D"])
    .interrogate()
)

validation

# Calculate statistics by category
summary = data.groupby("category").agg({
    "value": ["mean", "std"],
    "count": "sum"
}).round(2)

summary

# Create visualizations with Altair
chart1 = alt.Chart(data).mark_boxplot().encode(
    x=alt.X("category:N", title="Category"),
    y=alt.Y("value:Q", title="Value"),
    color="category:N"
).properties(
    width=300,
    height=300,
    title="Value Distribution by Category"
)

summary_df = data.groupby("category")["count"].sum().reset_index()

chart2 = alt.Chart(summary_df).mark_bar().encode(
    x=alt.X("category:N", title="Category"),
    y=alt.Y("count:Q", title="Total Count"),
    color=alt.Color("category:N", legend=None)
).properties(
    width=300,
    height=300,
    title="Total Count by Category"
)

# Combine charts
combined = alt.hconcat(chart1, chart2)
combined