In [None]:
import plotly.express as px
import polars as pl

from fryer import all as fryer

In [None]:
df = fryer.data.uk_gov_hm_land_registry_price_paid.read().with_columns(
    month=pl.col("date").dt.month_start(),
    year=pl.col("date").dt.strftime("%Y-01-01").str.to_date("%Y-%m-%d"),
)

In [None]:
df.collect_schema().names()

In [None]:
for col in ("year", "month"):
    df_len = df.group_by(col).len().collect()
    display(df_len.pipe(px.bar, x=col, y="len", title=col))

In [None]:
column_x = "year"
for col in (
    "propertyType",
    "oldOrNew",
    "tenureDuration",
    "ppdCategoryType",
    "recordStatusMonthlyFileOnly",
):
    df_len = df.group_by([column_x, col]).len().collect()
    display(
        df_len.pipe(
            px.bar,
            x=column_x,
            y="len",
            color=col,
            title=col,
            category_orders={
                col: df_len.sort(by="len", descending=True)[col].to_list()
            },
        )
    )

In [None]:
column_group_by = "month"
column_value = "price"
map_quantiles = {
    f"{column_value}_q{quantile:.2f}": quantile
    for quantile in [
        # 0.0,
        # 0.01,
        0.05,
        0.1,
        0.25,
        0.5,
        0.75,
        0.9,
        0.95,
        # 0.99,
        # 1.0,
    ]
}
columns_quantiles = list(map_quantiles.keys())

(
    df.group_by(column_group_by)
    .agg(
        **{
            column_quantile: pl.col(column_value).quantile(quantile)
            for column_quantile, quantile in map_quantiles.items()
        }
    )
    .sort(by=column_group_by)
    .collect()
    .pipe(px.line, x=column_group_by, y=columns_quantiles, log_y=True)
)