In [10]:
import polars as pl
from polars import col, lit
import numpy as np

In [2]:
df = pl.DataFrame(
    {
        "A": [1, 2, 3, 4, 5],
        "fruits": ["banana", "banana", "apple", "apple", "banana"],
        "B": [5, 4, 3, 2, 1],
        "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
    }
)

In [11]:
(df
    .sort("fruits")
    .select([
    "fruits",
    "cars",
    lit("fruits").alias("literal_string_fruits"),
    col("B").filter(col("cars") == "beetle").sum(),
    col("A").filter(col("B") > 2).sum().over("cars").alias("sum_A_by_cars"),       # groups by "cars"
    col("A").sum().over("fruits").alias("sum_A_by_fruits"),                        # groups by "fruits"
    col("A").reverse().over("fruits").flatten().alias("rev_A_by_fruits"),          # groups by "fruits
    col("A").sort_by("B").over("fruits").flatten().alias("sort_A_by_B_by_fruits"),  # groups by "fruits"
    np.exp(col("A")).alias("exponent")
    ])
)

fruits,cars,literal_string_fruits,B,sum_A_by_cars,sum_A_by_fruits,rev_A_by_fruits,sort_A_by_B_by_fruits,exponent
str,str,str,i64,i64,i64,i64,i64,f64
"""apple""","""beetle""","""fruits""",11,4,7,4,4,20.085536923187668
"""apple""","""beetle""","""fruits""",11,4,7,3,3,54.59815003314424
"""banana""","""beetle""","""fruits""",11,4,8,5,5,2.718281828459045
"""banana""","""audi""","""fruits""",11,2,8,2,2,7.38905609893065
"""banana""","""beetle""","""fruits""",11,4,8,1,1,148.4131591025766


In [16]:
# if else
df.with_column(    
    pl.when(col("cars")=="audi").then(col("B")).otherwise((-1)).alias("new")
)

A,fruits,B,cars,new
i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",-1
2,"""banana""",4,"""audi""",4
3,"""apple""",3,"""beetle""",-1
4,"""apple""",2,"""beetle""",-1
5,"""banana""",1,"""beetle""",-1


In [19]:
# where
df.filter(col("cars")=="beetle")

A,fruits,B,cars
i64,str,i64,str
1,"""banana""",5,"""beetle"""
3,"""apple""",3,"""beetle"""
4,"""apple""",2,"""beetle"""
5,"""banana""",1,"""beetle"""


In [25]:
# select columns
df.select([
    pl.all().exclude("cars")
    ])

A,fruits,B
i64,str,i64
1,"""banana""",5
2,"""banana""",4
3,"""apple""",3
4,"""apple""",2
5,"""banana""",1


In [26]:
df.groupby("fruits").agg([
    col("cars")
])

fruits,cars
str,list
"""apple""","[""beetle"", ""beetle""]"
"""banana""","[""beetle"", ""audi"", ""beetle""]"


In [27]:
df.groupby("fruits").agg([
    col("cars").count().alias("car_count")
])

fruits,car_count
str,u32
"""banana""",3
"""apple""",2


In [40]:
# apply
(df.select([
    "fruits",
    "cars",
    "A",
    col("A").apply(lambda x: np.exp(x)).alias("a_new")
]))

fruits,cars,A,a_new
str,str,i64,f64
"""banana""","""beetle""",1,2.718281828459045
"""banana""","""audi""",2,7.38905609893065
"""apple""","""beetle""",3,20.085536923187668
"""apple""","""beetle""",4,54.59815003314424
"""banana""","""beetle""",5,148.4131591025766
