In [None]:
import polars as pl

# Read the monthly parquet file lazily
df = pl.scan_parquet("s3://weave.energy/beta/smart-meter/2024-02.parquet")

# Example 1: Efficient filtering and aggregation
hourly_consumption = (
    df.filter(pl.col("dno_alias") == "SSEN")
    .groupby([
        pl.col("data_collection_log_timestamp").dt.hour(),
        "secondary_substation_unique_id"
    ])
    .agg([
        pl.col("total_consumption_active_import").mean().alias("avg_consumption")
    ])
    .collect()  # Only now is the data actually loaded
)

# Example 2: Time-based window operations
daily_stats = (
    df.groupby_dynamic("data_collection_log_timestamp", every="1d")
    .agg([
        pl.col("total_consumption_active_import").sum().alias("daily_consumption"),
        pl.col("aggregated_device_count_active").mean().alias("avg_active_devices")
    ])
    .collect()
)