In [13]:

import polars as pl
df = pl.DataFrame(
    {
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
        "optional": [28, 300, None, 2, -30],
    }
)
df

A,fruits,B,cars,optional
i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28.0
2,"""banana""",4,"""audi""",300.0
3,"""apple""",3,"""beetle""",
4,"""apple""",2,"""beetle""",2.0
5,"""banana""",1,"""beetle""",-30.0


In [14]:
# We can select by name

(df.select([
    pl.col("A"),
    "B",      # the col part is inferred
    pl.lit("B"),  # we must tell polars we mean the literal "B"
    pl.col("fruits"),
]))


A,B,literal,fruits
i64,i64,str,str
1,5,"""B""","""banana"""
2,4,"""B""","""banana"""
3,3,"""B""","""apple"""
4,2,"""B""","""apple"""
5,1,"""B""","""banana"""


In [15]:
# you can select columns with a regex if it starts with '^' and ends with '$'

(df.select([
    pl.col("^A|B$").sum()
]))


A,B
i64,i64
15,15


In [16]:
# We select everything in normal order
# Then we select everything in reversed order

(df.select([
    pl.all(),
    pl.all().reverse().suffix("_reverse")
]))


A,fruits,B,cars,optional,A_reverse,fruits_reverse,B_reverse,cars_reverse,optional_reverse
i64,str,i64,str,i64,i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28.0,5,"""banana""",1,"""beetle""",-30.0
2,"""banana""",4,"""audi""",300.0,4,"""apple""",2,"""beetle""",2.0
3,"""apple""",3,"""beetle""",,3,"""apple""",3,"""beetle""",
4,"""apple""",2,"""beetle""",2.0,2,"""banana""",4,"""audi""",300.0
5,"""banana""",1,"""beetle""",-30.0,1,"""banana""",5,"""beetle""",28.0


In [17]:
# there are `str` and `dt` namespaces for specialized functions

predicate = pl.col("fruits").str.contains("^b.*")

(df.select([
    predicate
]))


fruits
bool
True
True
False
False
True


In [18]:
# predicate expressions can be used to filter

(df.select([
    pl.col("A").filter(pl.col("fruits").str.contains("^b.*")).sum(),
    (pl.col("B").filter(pl.col("cars").str.contains("^b.*")).sum()
     * pl.col("B").sum()).alias("some_compute()"),
]))


A,some_compute()
i64,i64
8,165
