In [13]:
import polars as pl

df = pl.read_csv('people.txt', 
                 separator=',', 
                 has_header=False, 
                 dtypes=[pl.Utf8, pl.Int64], 
                 new_columns=['name', 'age']
    )

In [14]:
df

name,age
str,i64
"""Michael""",29
"""Andy""",30
"""Justin""",19


In [15]:
df.select(pl.col('name'))

name
str
"""Michael"""
"""Andy"""
"""Justin"""


In [17]:
df.select(pl.mean('age'))

age
f64
26.0


In [19]:
from datetime import datetime

purchases = pl.DataFrame(
    {
        "customer_id": [1, 2, 2, 3, 1],
        "date": [
            datetime(2025, 1, 1),
            datetime(2025, 2, 2),
            datetime(2025, 3, 25),
            datetime(2025, 4, 3),
            datetime(2025, 4, 3)
        ],
        "product": [11825, 12453, 13245, 12453, 10789],
        "price": [4.0, 5.0, 6.0, 6.0, 5.5],
        "quantity": [1, 2, 10, 5, 7],
    }
)

print(purchases)

shape: (5, 5)
┌─────────────┬─────────────────────┬─────────┬───────┬──────────┐
│ customer_id ┆ date                ┆ product ┆ price ┆ quantity │
│ ---         ┆ ---                 ┆ ---     ┆ ---   ┆ ---      │
│ i64         ┆ datetime[μs]        ┆ i64     ┆ f64   ┆ i64      │
╞═════════════╪═════════════════════╪═════════╪═══════╪══════════╡
│ 1           ┆ 2025-01-01 00:00:00 ┆ 11825   ┆ 4.0   ┆ 1        │
│ 2           ┆ 2025-02-02 00:00:00 ┆ 12453   ┆ 5.0   ┆ 2        │
│ 2           ┆ 2025-03-25 00:00:00 ┆ 13245   ┆ 6.0   ┆ 10       │
│ 3           ┆ 2025-04-03 00:00:00 ┆ 12453   ┆ 6.0   ┆ 5        │
│ 1           ┆ 2025-04-03 00:00:00 ┆ 10789   ┆ 5.5   ┆ 7        │
└─────────────┴─────────────────────┴─────────┴───────┴──────────┘


In [34]:
customers = df.with_columns(pl.lit(0).alias('customer_id')).with_columns(pl.col('customer_id').cum_count().cast(pl.Int64))

In [35]:
print(customers)

shape: (3, 3)
┌─────────┬─────┬─────────────┐
│ name    ┆ age ┆ customer_id │
│ ---     ┆ --- ┆ ---         │
│ str     ┆ i64 ┆ i64         │
╞═════════╪═════╪═════════════╡
│ Michael ┆ 29  ┆ 1           │
│ Andy    ┆ 30  ┆ 2           │
│ Justin  ┆ 19  ┆ 3           │
└─────────┴─────┴─────────────┘


In [36]:
joined = customers.join(purchases, left_on='customer_id', right_on='customer_id')

In [37]:
print(joined)

shape: (5, 7)
┌─────────┬─────┬─────────────┬─────────────────────┬─────────┬───────┬──────────┐
│ name    ┆ age ┆ customer_id ┆ date                ┆ product ┆ price ┆ quantity │
│ ---     ┆ --- ┆ ---         ┆ ---                 ┆ ---     ┆ ---   ┆ ---      │
│ str     ┆ i64 ┆ i64         ┆ datetime[μs]        ┆ i64     ┆ f64   ┆ i64      │
╞═════════╪═════╪═════════════╪═════════════════════╪═════════╪═══════╪══════════╡
│ Michael ┆ 29  ┆ 1           ┆ 2025-01-01 00:00:00 ┆ 11825   ┆ 4.0   ┆ 1        │
│ Andy    ┆ 30  ┆ 2           ┆ 2025-02-02 00:00:00 ┆ 12453   ┆ 5.0   ┆ 2        │
│ Andy    ┆ 30  ┆ 2           ┆ 2025-03-25 00:00:00 ┆ 13245   ┆ 6.0   ┆ 10       │
│ Justin  ┆ 19  ┆ 3           ┆ 2025-04-03 00:00:00 ┆ 12453   ┆ 6.0   ┆ 5        │
│ Michael ┆ 29  ┆ 1           ┆ 2025-04-03 00:00:00 ┆ 10789   ┆ 5.5   ┆ 7        │
└─────────┴─────┴─────────────┴─────────────────────┴─────────┴───────┴──────────┘


In [48]:
joined \
    .select(pl.col('date').dt.year(), pl.col('price') * pl.col('quantity')) \
    .group_by(pl.col('date')).sum()

date,price
i32,f64
2025,142.5
