In [2]:
import polars as pl

In [3]:
s = pl.Series("a", [1, 2, 3, 4, 5])
print(s)

shape: (5,)
Series: 'a' [i64]
[
	1
	2
	3
	4
	5
]


In [4]:
s.rename("b")

b
i64
1
2
3
4
5


In [5]:
s = pl.Series("a", [1, 2, 3, 4, 5])
print(s.min())
print(s.max())

1
5


In [6]:
s = pl.Series("a", ["polar", "bear", "arctic", "polar fox", "polar bear"])
s2 = s.str.replace("polar", "pola")
print(s2)

shape: (5,)
Series: 'a' [str]
[
	"pola"
	"bear"
	"arctic"
	"pola fox"
	"pola bear"
]


In [7]:
from datetime import date

start = date(2001, 1, 1)
stop = date(2001, 1, 9)
s = pl.date_range(start, stop, interval="2d", eager=True)
print(s.dt.day())


shape: (5,)
Series: 'date' [u32]
[
	1
	3
	5
	7
	9
]


In [8]:
from datetime import datetime

df = pl.DataFrame(
    {
        "integer": [1, 2, 3, 4, 5],
        "date": [
            datetime(2022, 1, 1),
            datetime(2022, 1, 2),
            datetime(2022, 1, 3),
            datetime(2022, 1, 4),
            datetime(2022, 1, 5),
        ],
        "float": [4.0, 5.0, 6.0, 7.0, 8.0],
    }
)

print(df)

shape: (5, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 4       ┆ 2022-01-04 00:00:00 ┆ 7.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [9]:
print(df.head(3))

shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [10]:
df.head(3)

integer,date,float
i64,datetime[μs],f64
1,2022-01-01 00:00:00,4.0
2,2022-01-02 00:00:00,5.0
3,2022-01-03 00:00:00,6.0


In [11]:
print(df.tail(3))

shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 4       ┆ 2022-01-04 00:00:00 ┆ 7.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [12]:
print(df.sample(2)) # this selects a sample of random rows

shape: (2, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [13]:
print(df.describe())

shape: (9, 4)
┌────────────┬──────────┬─────────────────────┬──────────┐
│ describe   ┆ integer  ┆ date                ┆ float    │
│ ---        ┆ ---      ┆ ---                 ┆ ---      │
│ str        ┆ f64      ┆ str                 ┆ f64      │
╞════════════╪══════════╪═════════════════════╪══════════╡
│ count      ┆ 5.0      ┆ 5                   ┆ 5.0      │
│ null_count ┆ 0.0      ┆ 0                   ┆ 0.0      │
│ mean       ┆ 3.0      ┆ null                ┆ 6.0      │
│ std        ┆ 1.581139 ┆ null                ┆ 1.581139 │
│ min        ┆ 1.0      ┆ 2022-01-01 00:00:00 ┆ 4.0      │
│ 25%        ┆ 2.0      ┆ null                ┆ 5.0      │
│ 50%        ┆ 3.0      ┆ null                ┆ 6.0      │
│ 75%        ┆ 4.0      ┆ null                ┆ 7.0      │
│ max        ┆ 5.0      ┆ 2022-01-05 00:00:00 ┆ 8.0      │
└────────────┴──────────┴─────────────────────┴──────────┘


In [14]:
import polars as pl
from datetime import datetime

df = pl.DataFrame(
    {
        "integer": [1, 2, 3],
        "date": [
            datetime(2022, 1, 1),
            datetime(2022, 1, 2),
            datetime(2022, 1, 3),
        ],
        "float": [4.0, 5.0, 6.0],
    }
)

print(df)

shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [15]:
df.write_csv("docs/data/output.csv")
df_csv = pl.read_csv("docs/data/output.csv")
print(df_csv)

shape: (3, 3)
┌─────────┬────────────────────────────┬───────┐
│ integer ┆ date                       ┆ float │
│ ---     ┆ ---                        ┆ ---   │
│ i64     ┆ str                        ┆ f64   │
╞═════════╪════════════════════════════╪═══════╡
│ 1       ┆ 2022-01-01T00:00:00.000000 ┆ 4.0   │
│ 2       ┆ 2022-01-02T00:00:00.000000 ┆ 5.0   │
│ 3       ┆ 2022-01-03T00:00:00.000000 ┆ 6.0   │
└─────────┴────────────────────────────┴───────┘


In [16]:
df_csv = pl.read_csv("docs/data/output.csv", try_parse_dates=True)
print(df_csv)


shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [17]:
df.write_json("docs/data/output.json")
df_json = pl.read_json("docs/data/output.json")
print(df_json)

shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [18]:
df.write_parquet("docs/data/output.parquet")
df_parquet = pl.read_parquet("docs/data/output.parquet")
print(df_parquet)

shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [19]:
df.select(pl.col("*"))


integer,date,float
i64,datetime[μs],f64
1,2022-01-01 00:00:00,4.0
2,2022-01-02 00:00:00,5.0
3,2022-01-03 00:00:00,6.0


In [20]:
df.select(pl.col(["date", "float"])).limit(2)

date,float
datetime[μs],f64
2022-01-01 00:00:00,4.0
2022-01-02 00:00:00,5.0


In [21]:
df.select([pl.col("date"), pl.col("float")]).limit(3)

date,float
datetime[μs],f64
2022-01-01 00:00:00,4.0
2022-01-02 00:00:00,5.0
2022-01-03 00:00:00,6.0


In [22]:
df.select([pl.exclude("date")])

integer,float
i64,f64
1,4.0
2,5.0
3,6.0


In [23]:
df.filter(
    pl.col("date").is_between(datetime(2022, 1, 2), datetime(2022, 1, 3)),
)

integer,date,float
i64,datetime[μs],f64
2,2022-01-02 00:00:00,5.0
3,2022-01-03 00:00:00,6.0


In [24]:
df.filter((pl.col("float") <= 5) & (pl.col("float").is_not_nan()))

integer,date,float
i64,datetime[μs],f64
1,2022-01-01 00:00:00,4.0
2,2022-01-02 00:00:00,5.0


In [25]:
df.with_columns([pl.col("float").sum().alias("e"), (pl.col("float") + 42).alias("float+42")])



integer,date,float,e,float+42
i64,datetime[μs],f64,f64,f64
1,2022-01-01 00:00:00,4.0,15.0,46.0
2,2022-01-02 00:00:00,5.0,15.0,47.0
3,2022-01-03 00:00:00,6.0,15.0,48.0


In [26]:
import numpy as np
df2 = pl.DataFrame(
    {
        "x": np.arange(0, 8),
        "y": ["A", "A", "A", "B", "B", "C", "X", "X"],
    }
)


In [27]:
df2.group_by("y", maintain_order=True).count()

y,count
str,u32
"""A""",3
"""B""",2
"""C""",1
"""X""",2


In [28]:
df3 = df2.group_by("y", maintain_order=True).agg(
    [
        pl.col("*").count().alias("count"),
        pl.col("*").sum().alias("sum"),
        pl.col("*").mean().alias("mean"),
        pl.col("*").median().alias("median")
    ]
)
df3

y,count,sum,mean,median
str,u32,i64,f64,f64
"""A""",3,3,1.0,1.0
"""B""",2,7,3.5,3.5
"""C""",1,5,5.0,5.0
"""X""",2,13,6.5,6.5


In [29]:
df3_x = df3.with_columns((pl.col("count") * pl.col("sum")).alias("count * sum")).select(
    [pl.all().exclude(["mean", "median"])]
)

print(df3_x)

shape: (4, 4)
┌─────┬───────┬─────┬─────────────┐
│ y   ┆ count ┆ sum ┆ count * sum │
│ --- ┆ ---   ┆ --- ┆ ---         │
│ str ┆ u32   ┆ i64 ┆ i64         │
╞═════╪═══════╪═════╪═════════════╡
│ A   ┆ 3     ┆ 3   ┆ 9           │
│ B   ┆ 2     ┆ 7   ┆ 14          │
│ C   ┆ 1     ┆ 5   ┆ 5           │
│ X   ┆ 2     ┆ 13  ┆ 26          │
└─────┴───────┴─────┴─────────────┘


In [30]:
df3_y = df3.with_columns([(pl.col("count") * pl.col("sum")).alias("count * sum")]).select(
    [pl.all().exclude(["mean", "median"])]
)

print(df3_y)

shape: (4, 4)
┌─────┬───────┬─────┬─────────────┐
│ y   ┆ count ┆ sum ┆ count * sum │
│ --- ┆ ---   ┆ --- ┆ ---         │
│ str ┆ u32   ┆ i64 ┆ i64         │
╞═════╪═══════╪═════╪═════════════╡
│ A   ┆ 3     ┆ 3   ┆ 9           │
│ B   ┆ 2     ┆ 7   ┆ 14          │
│ C   ┆ 1     ┆ 5   ┆ 5           │
│ X   ┆ 2     ┆ 13  ┆ 26          │
└─────┴───────┴─────┴─────────────┘


# join

In [31]:
df = pl.DataFrame(
    {
        "a": np.arange(0, 8),
        "b": np.random.rand(8),
        "d": [1, 2.0, np.NaN, np.NaN, 0, -5, -42, None],
    }
)

df2 = pl.DataFrame(
    {
        "x": np.arange(0, 8),
        "y": ["A", "A", "A", "B", "B", "C", "X", "X"],
    }
)
joined = df.join(df2, left_on="a", right_on="x")
print(joined)

shape: (8, 4)
┌─────┬──────────┬───────┬─────┐
│ a   ┆ b        ┆ d     ┆ y   │
│ --- ┆ ---      ┆ ---   ┆ --- │
│ i64 ┆ f64      ┆ f64   ┆ str │
╞═════╪══════════╪═══════╪═════╡
│ 0   ┆ 0.324516 ┆ 1.0   ┆ A   │
│ 1   ┆ 0.98358  ┆ 2.0   ┆ A   │
│ 2   ┆ 0.466033 ┆ NaN   ┆ A   │
│ 3   ┆ 0.304767 ┆ NaN   ┆ B   │
│ 4   ┆ 0.819217 ┆ 0.0   ┆ B   │
│ 5   ┆ 0.861637 ┆ -5.0  ┆ C   │
│ 6   ┆ 0.746745 ┆ -42.0 ┆ X   │
│ 7   ┆ 0.665884 ┆ null  ┆ X   │
└─────┴──────────┴───────┴─────┘


# Concat

In [32]:
stacked = df.hstack(df2)
print(stacked)

shape: (8, 5)
┌─────┬──────────┬───────┬─────┬─────┐
│ a   ┆ b        ┆ d     ┆ x   ┆ y   │
│ --- ┆ ---      ┆ ---   ┆ --- ┆ --- │
│ i64 ┆ f64      ┆ f64   ┆ i64 ┆ str │
╞═════╪══════════╪═══════╪═════╪═════╡
│ 0   ┆ 0.324516 ┆ 1.0   ┆ 0   ┆ A   │
│ 1   ┆ 0.98358  ┆ 2.0   ┆ 1   ┆ A   │
│ 2   ┆ 0.466033 ┆ NaN   ┆ 2   ┆ A   │
│ 3   ┆ 0.304767 ┆ NaN   ┆ 3   ┆ B   │
│ 4   ┆ 0.819217 ┆ 0.0   ┆ 4   ┆ B   │
│ 5   ┆ 0.861637 ┆ -5.0  ┆ 5   ┆ C   │
│ 6   ┆ 0.746745 ┆ -42.0 ┆ 6   ┆ X   │
│ 7   ┆ 0.665884 ┆ null  ┆ 7   ┆ X   │
└─────┴──────────┴───────┴─────┴─────┘


# Data Structures

In [35]:
import polars as pl

s = pl.Series("a", [1, 2, 3, 4, 5])
print(s)

shape: (5,)
Series: 'a' [i64]
[
	1
	2
	3
	4
	5
]


## Date Frame

In [36]:
from datetime import datetime

df = pl.DataFrame(
    {
        "integer": [1, 2, 3, 4, 5],
        "date": [
            datetime(2022, 1, 1),
            datetime(2022, 1, 2),
            datetime(2022, 1, 3),
            datetime(2022, 1, 4),
            datetime(2022, 1, 5),
        ],
        "float": [4.0, 5.0, 6.0, 7.0, 8.0],
    }
)

print(df)

shape: (5, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 4       ┆ 2022-01-04 00:00:00 ┆ 7.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [37]:
print(df.head(3))


shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 1       ┆ 2022-01-01 00:00:00 ┆ 4.0   │
│ 2       ┆ 2022-01-02 00:00:00 ┆ 5.0   │
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
└─────────┴─────────────────────┴───────┘


In [38]:
print(df.tail(3))


shape: (3, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 4       ┆ 2022-01-04 00:00:00 ┆ 7.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [39]:
print(df.sample(2)) # n number of random rows


shape: (2, 3)
┌─────────┬─────────────────────┬───────┐
│ integer ┆ date                ┆ float │
│ ---     ┆ ---                 ┆ ---   │
│ i64     ┆ datetime[μs]        ┆ f64   │
╞═════════╪═════════════════════╪═══════╡
│ 3       ┆ 2022-01-03 00:00:00 ┆ 6.0   │
│ 5       ┆ 2022-01-05 00:00:00 ┆ 8.0   │
└─────────┴─────────────────────┴───────┘


In [40]:
print(df.describe())

shape: (9, 4)
┌────────────┬──────────┬─────────────────────┬──────────┐
│ describe   ┆ integer  ┆ date                ┆ float    │
│ ---        ┆ ---      ┆ ---                 ┆ ---      │
│ str        ┆ f64      ┆ str                 ┆ f64      │
╞════════════╪══════════╪═════════════════════╪══════════╡
│ count      ┆ 5.0      ┆ 5                   ┆ 5.0      │
│ null_count ┆ 0.0      ┆ 0                   ┆ 0.0      │
│ mean       ┆ 3.0      ┆ null                ┆ 6.0      │
│ std        ┆ 1.581139 ┆ null                ┆ 1.581139 │
│ min        ┆ 1.0      ┆ 2022-01-01 00:00:00 ┆ 4.0      │
│ 25%        ┆ 2.0      ┆ null                ┆ 5.0      │
│ 50%        ┆ 3.0      ┆ null                ┆ 6.0      │
│ 75%        ┆ 4.0      ┆ null                ┆ 7.0      │
│ max        ┆ 5.0      ┆ 2022-01-05 00:00:00 ┆ 8.0      │
└────────────┴──────────┴─────────────────────┴──────────┘


# Context

In [41]:
df = pl.DataFrame(
    {
        "nrs": [1, 2, 3, None, 5],
        "names": ["foo", "ham", "spam", "egg", None],
        "random": np.random.rand(5),
        "groups": ["A", "A", "B", "C", "B"],
    }
)
print(df)

shape: (5, 4)
┌──────┬───────┬──────────┬────────┐
│ nrs  ┆ names ┆ random   ┆ groups │
│ ---  ┆ ---   ┆ ---      ┆ ---    │
│ i64  ┆ str   ┆ f64      ┆ str    │
╞══════╪═══════╪══════════╪════════╡
│ 1    ┆ foo   ┆ 0.485919 ┆ A      │
│ 2    ┆ ham   ┆ 0.661088 ┆ A      │
│ 3    ┆ spam  ┆ 0.43377  ┆ B      │
│ null ┆ egg   ┆ 0.179805 ┆ C      │
│ 5    ┆ null  ┆ 0.327631 ┆ B      │
└──────┴───────┴──────────┴────────┘


In [42]:
type({
        "nrs": [1, 2, 3, None, 5],
        "names": ["foo", "ham", "spam", "egg", None],
        "random": np.random.rand(5),
        "groups": ["A", "A", "B", "C", "B"],
    })

dict

## Select

In [43]:
out = df.select(
    pl.sum("nrs"),
    pl.col("names").sort(),
    pl.col("names").first().alias("first name"),
    (pl.mean("nrs") * 10).alias("10xnrs"),
)
print(out)

shape: (5, 4)
┌─────┬───────┬────────────┬────────┐
│ nrs ┆ names ┆ first name ┆ 10xnrs │
│ --- ┆ ---   ┆ ---        ┆ ---    │
│ i64 ┆ str   ┆ str        ┆ f64    │
╞═════╪═══════╪════════════╪════════╡
│ 11  ┆ null  ┆ foo        ┆ 27.5   │
│ 11  ┆ egg   ┆ foo        ┆ 27.5   │
│ 11  ┆ foo   ┆ foo        ┆ 27.5   │
│ 11  ┆ ham   ┆ foo        ┆ 27.5   │
│ 11  ┆ spam  ┆ foo        ┆ 27.5   │
└─────┴───────┴────────────┴────────┘


In [44]:
df = df.with_columns(
    pl.sum("nrs").alias("nrs_sum"),
    pl.col("random").count().alias("count"),
)
print(df)

shape: (5, 6)
┌──────┬───────┬──────────┬────────┬─────────┬───────┐
│ nrs  ┆ names ┆ random   ┆ groups ┆ nrs_sum ┆ count │
│ ---  ┆ ---   ┆ ---      ┆ ---    ┆ ---     ┆ ---   │
│ i64  ┆ str   ┆ f64      ┆ str    ┆ i64     ┆ u32   │
╞══════╪═══════╪══════════╪════════╪═════════╪═══════╡
│ 1    ┆ foo   ┆ 0.485919 ┆ A      ┆ 11      ┆ 5     │
│ 2    ┆ ham   ┆ 0.661088 ┆ A      ┆ 11      ┆ 5     │
│ 3    ┆ spam  ┆ 0.43377  ┆ B      ┆ 11      ┆ 5     │
│ null ┆ egg   ┆ 0.179805 ┆ C      ┆ 11      ┆ 5     │
│ 5    ┆ null  ┆ 0.327631 ┆ B      ┆ 11      ┆ 5     │
└──────┴───────┴──────────┴────────┴─────────┴───────┘


## Filter

In [45]:
out = df.filter(pl.col("nrs") > 2)
print(out)

shape: (2, 6)
┌─────┬───────┬──────────┬────────┬─────────┬───────┐
│ nrs ┆ names ┆ random   ┆ groups ┆ nrs_sum ┆ count │
│ --- ┆ ---   ┆ ---      ┆ ---    ┆ ---     ┆ ---   │
│ i64 ┆ str   ┆ f64      ┆ str    ┆ i64     ┆ u32   │
╞═════╪═══════╪══════════╪════════╪═════════╪═══════╡
│ 3   ┆ spam  ┆ 0.43377  ┆ B      ┆ 11      ┆ 5     │
│ 5   ┆ null  ┆ 0.327631 ┆ B      ┆ 11      ┆ 5     │
└─────┴───────┴──────────┴────────┴─────────┴───────┘


## Group by / aggregation

In [46]:
out = df.group_by("groups").agg(
    pl.sum("nrs"),  # sum nrs by groups
    pl.col("random").count().alias("count"),  # count group members
    # sum random where name != null
    pl.col("random").filter(pl.col("names").is_not_null()).sum().suffix("_sum"),
    pl.col("names").reverse().alias("reversed names"),
)
print(out)

shape: (3, 5)
┌────────┬─────┬───────┬────────────┬────────────────┐
│ groups ┆ nrs ┆ count ┆ random_sum ┆ reversed names │
│ ---    ┆ --- ┆ ---   ┆ ---        ┆ ---            │
│ str    ┆ i64 ┆ u32   ┆ f64        ┆ list[str]      │
╞════════╪═════╪═══════╪════════════╪════════════════╡
│ A      ┆ 3   ┆ 2     ┆ 1.147007   ┆ ["ham", "foo"] │
│ B      ┆ 8   ┆ 2     ┆ 0.43377    ┆ [null, "spam"] │
│ C      ┆ 0   ┆ 1     ┆ 0.179805   ┆ ["egg"]        │
└────────┴─────┴───────┴────────────┴────────────────┘


# Expressions

In [47]:
pl.col("foo").sort().head(2)

In [49]:
df

nrs,names,random,groups,nrs_sum,count
i64,str,f64,str,i64,u32
1.0,"""foo""",0.485919,"""A""",11,5
2.0,"""ham""",0.661088,"""A""",11,5
3.0,"""spam""",0.43377,"""B""",11,5
,"""egg""",0.179805,"""C""",11,5
5.0,,0.327631,"""B""",11,5


In [None]:
{
        "nrs": [1, 2, 3, None, 5],
        "names": ["foo", "ham", "spam", "egg", None],
        "random": np.random.rand(5),
        "groups": ["A", "A", "B", "C", "B"],
    }

In [62]:
df.with_columns(pl.DataFrame({"nr": [1, 2, 3, 4, 5]}))

nrs,names,random,groups,nrs_sum,count,nr
i64,str,f64,str,i64,u32,i64
1.0,"""foo""",0.485919,"""A""",11,5,1
2.0,"""ham""",0.661088,"""A""",11,5,2
3.0,"""spam""",0.43377,"""B""",11,5,3
,"""egg""",0.179805,"""C""",11,5,4
5.0,,0.327631,"""B""",11,5,5


In [65]:
df.select(pl.col("names").sort().head(4),
          pl.col("nrs_sum").filter(pl.col("nrs_sum") == 11).sum(),
          ).with_columns(pl.DataFrame({"nr": [1, 2, 3, 4]}))

names,nrs_sum,nr
str,i64,i64
,55,1
"""egg""",55,2
"""foo""",55,3
"""ham""",55,4
