In [3]:
import numpy as np
import polars as pl

In [4]:
df = pl.DataFrame({
    'nrs': [1, 2, 3, None, 5, 6],
    'names': ['foo', 'ham', 'spam', 'egg', None, 'spam'],
    'random': np.random.rand(6),
    'group': list('AABCBB')})
df

nrs,names,random,group
i64,str,f64,str
1.0,"""foo""",0.05789,"""A"""
2.0,"""ham""",0.399455,"""A"""
3.0,"""spam""",0.542526,"""B"""
,"""egg""",0.586351,"""C"""
5.0,,0.288201,"""B"""
6.0,"""spam""",0.752893,"""B"""


In [6]:
out = df.select([
    pl.sum('nrs'),
    pl.col('names').sort(),
    pl.col('names').first().alias('first_name'),
    (pl.mean('nrs') * 10).alias('ten_xbar')])
out

nrs,names,first_name,ten_xbar
i64,str,str,f64
17,,"""foo""",34.0
17,"""egg""","""foo""",34.0
17,"""foo""","""foo""",34.0
17,"""ham""","""foo""",34.0
17,"""spam""","""foo""",34.0
17,"""spam""","""foo""",34.0


In [7]:
df = df.with_columns([
    pl.sum('nrs').alias('∑nrs'),
    pl.col('random').count().alias('count')])
df

nrs,names,random,group,∑nrs,count
i64,str,f64,str,i64,u32
1.0,"""foo""",0.05789,"""A""",17,6
2.0,"""ham""",0.399455,"""A""",17,6
3.0,"""spam""",0.542526,"""B""",17,6
,"""egg""",0.586351,"""C""",17,6
5.0,,0.288201,"""B""",17,6
6.0,"""spam""",0.752893,"""B""",17,6


In [12]:
out = (
    df
    .groupby('group')
    .agg([
        pl.sum('nrs'),
        pl.col('random').count().alias('count'),
        (pl
         .col('random')
         .filter(pl.col('names').is_not_null())
         .sum()
         .suffix('_sum')),
        pl.col('names').reverse().alias('reversed_names')]))
out

group,nrs,count,random_sum,reversed_names
str,i64,u32,f64,list[str]
"""A""",3.0,2,0.457344,"[""ham"", ""foo""]"
"""B""",14.0,3,1.295419,"[""spam"", null, ""spam""]"
"""C""",,1,0.586351,"[""egg""]"
