In [1]:
import polars as pl

In [2]:
df = pl.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'],
    'B': [5, 4, 3, 2, 1],
    'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'],
    'optional': [28, 300, None, 2, -30]})
df

A,fruits,B,cars,optional
i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28.0
2,"""banana""",4,"""audi""",300.0
3,"""apple""",3,"""beetle""",
4,"""apple""",2,"""beetle""",2.0
5,"""banana""",1,"""beetle""",-30.0


In [3]:
df.select([
    pl.col('A'),
    'B',
    pl.lit('B')])

A,B,literal
i64,i64,str
1,5,"""B"""
2,4,"""B"""
3,3,"""B"""
4,2,"""B"""
5,1,"""B"""


In [4]:
df.select(pl.col('A'), 'B', pl.lit('B'))

A,B,literal
i64,i64,str
1,5,"""B"""
2,4,"""B"""
3,3,"""B"""
4,2,"""B"""
5,1,"""B"""


In [5]:
df.select(pl.col('^A|B$').sum())

A,B
i64,i64
15,15


In [6]:
df.select(pl.all(), pl.all().reverse().suffix('_reversed'))

A,fruits,B,cars,optional,A_reversed,fruits_reversed,B_reversed,cars_reversed,optional_reversed
i64,str,i64,str,i64,i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28.0,5,"""banana""",1,"""beetle""",-30.0
2,"""banana""",4,"""audi""",300.0,4,"""apple""",2,"""beetle""",2.0
3,"""apple""",3,"""beetle""",,3,"""apple""",3,"""beetle""",
4,"""apple""",2,"""beetle""",2.0,2,"""banana""",4,"""audi""",300.0
5,"""banana""",1,"""beetle""",-30.0,1,"""banana""",5,"""beetle""",28.0


In [7]:
df.select(pl.all(), pl.all().sum().suffix('_sum'))

A,fruits,B,cars,optional,A_sum,fruits_sum,B_sum,cars_sum,optional_sum
i64,str,i64,str,i64,i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28.0,15,,15,,300
2,"""banana""",4,"""audi""",300.0,15,,15,,300
3,"""apple""",3,"""beetle""",,15,,15,,300
4,"""apple""",2,"""beetle""",2.0,15,,15,,300
5,"""banana""",1,"""beetle""",-30.0,15,,15,,300


In [8]:
predicate = pl.col('fruits').str.contains('^b.*')
df.filter(predicate)

A,fruits,B,cars,optional
i64,str,i64,str,i64
1,"""banana""",5,"""beetle""",28
2,"""banana""",4,"""audi""",300
5,"""banana""",1,"""beetle""",-30


In [9]:
df.select(pl.col('A').filter(predicate).sum())

A
i64
8


In [10]:
df.select(
    ((pl.col('A') / 124. * pl.col('B')) / pl.sum('B'))
    .alias('computed'))

computed
f64
0.002688
0.004301
0.004839
0.004301
0.002688


In [11]:
df.select(
    'fruits',
    'B',
    pl.when(pl.col('fruits') == 'banana')
      .then(pl.col('B'))
      .otherwise(-1).alias('b'))

fruits,B,b
str,i64,i64
"""banana""",5,5
"""banana""",4,4
"""apple""",3,-1
"""apple""",2,-1
"""banana""",1,1


In [12]:
df.select([
    'A',
    'B',
    pl.fold(0, lambda a, b: a + b, [pl.col('A'), 'B', pl.col('B')**2])
      .alias('fold')])

A,B,fold
i64,i64,f64
1,5,31.0
2,4,22.0
3,3,15.0
4,2,10.0
5,1,7.0


In [13]:
(df
 .groupby('fruits')
 .agg([
     pl.col('B').count().alias('BCount'),
     pl.col('B').sum().alias('BSum')]))

fruits,BCount,BSum
str,u32,i64
"""apple""",2,5
"""banana""",3,10


In [14]:
(df
 .groupby('fruits')
 .agg([
     pl.col('B').sum().alias('BSum'),
     pl.first('fruits').alias('firstFruits'),
     pl.count('A').alias('countA'),
     pl.col('cars').reverse()]))

fruits,BSum,firstFruits,countA,cars
str,i64,str,u32,list[str]
"""banana""",10,"""banana""",3,"[""beetle"", ""audi"", ""beetle""]"
"""apple""",5,"""apple""",2,"[""beetle"", ""beetle""]"


In [15]:
(df
 .sort('cars')
 .groupby('fruits')
 .agg([
     pl.col('B').sum().alias('BSum'),
     pl.sum('B').alias('BSum2'),
     pl.first('fruits').alias('firstFruits'),
     pl.count('A').alias('countA'),
     pl.col('cars').reverse()])
 .explode('cars'))

fruits,BSum,BSum2,firstFruits,countA,cars
str,i64,i64,str,u32,str
"""apple""",5,5,"""apple""",2,"""beetle"""
"""apple""",5,5,"""apple""",2,"""beetle"""
"""banana""",10,10,"""banana""",3,"""beetle"""
"""banana""",10,10,"""banana""",3,"""beetle"""
"""banana""",10,10,"""banana""",3,"""audi"""


In [17]:
(df
 .groupby('fruits')
 .agg(pl.col('B').agg_groups().alias('groupRowIndices')))

fruits,groupRowIndices
str,list[u32]
"""apple""","[2, 3]"
"""banana""","[0, 1, 4]"


In [18]:
(df
 .groupby('fruits')
 .agg([pl.col('B').filter(pl.col('B') > 1).sum()]))

fruits,B
str,i64
"""banana""",9
"""apple""",5


In [20]:
(df
 .select(
     'fruits',
     'cars',
     'B',
     pl.col('B').sum().over('fruits').alias('BSumByFruit')))

fruits,cars,B,BSumByFruit
str,str,i64,i64
"""banana""","""beetle""",5,10
"""banana""","""audi""",4,10
"""apple""","""beetle""",3,5
"""apple""","""beetle""",2,5
"""banana""","""beetle""",1,10


In [21]:
(df
 .select(
     'fruits',
     'cars',
     'B',
     pl.col('B').sum().over('fruits').alias('BSumByFruit'),
     pl.col('B').sum().over('cars').alias('BSumByCar')))

fruits,cars,B,BSumByFruit,BSumByCar
str,str,i64,i64,i64
"""banana""","""beetle""",5,10,11
"""banana""","""audi""",4,10,4
"""apple""","""beetle""",3,5,11
"""apple""","""beetle""",2,5,11
"""banana""","""beetle""",1,10,11


In [22]:
(df
 .select(
     'fruits',
     'B',
     pl.col('B').shift().over('fruits').alias('lagBByFruit')))

fruits,B,lagBByFruit
str,i64,i64
"""banana""",5,
"""banana""",4,5.0
"""apple""",3,
"""apple""",2,3.0
"""banana""",1,4.0
