In [1]:
from datetime import datetime, timedelta

import numpy as np
import polars as pl

In [2]:
series = pl.Series('a', [1, 2, 3, 4, 5])
series

a
i64
1
2
3
4
5


In [3]:
series = pl.Series([1, 2, 3, 4, 5])
series

1
2
3
4
5


In [4]:
df = pl.DataFrame({
    'id': [1, 2, 3],
    'date':[(datetime(2000, 1, i)) for i in [1, 2, 3]],
    'stuff': [4.3, 5.4, 6.5]})
df

id,date,stuff
i64,datetime[μs],f64
1,2000-01-01 00:00:00,4.3
2,2000-01-02 00:00:00,5.4
3,2000-01-03 00:00:00,6.5


In [5]:
df.write_csv('polar_test.csv')

In [6]:
df = pl.read_csv('polar_test.csv')
df

id,date,stuff
i64,str,f64
1,"""2000-01-01T00:...",4.3
2,"""2000-01-02T00:...",5.4
3,"""2000-01-03T00:...",6.5


In [7]:
df = pl.read_csv('polar_test.csv', parse_dates=True)
df

id,date,stuff
i64,datetime[μs],f64
1,2000-01-01 00:00:00,4.3
2,2000-01-02 00:00:00,5.4
3,2000-01-03 00:00:00,6.5


### Viewing Data

In [2]:
df = pl.DataFrame({
    'a': np.arange(8),
    'b': np.random.rand(8),
    'c': [datetime(2023, 1, i) for i in range(1, 9)],
    'd': [1, 2.0, np.nan, np.nan, 0, -5, -42, None]})
df

a,b,c,d
i64,f64,datetime[μs],f64
0,0.561821,2023-01-01 00:00:00,1.0
1,0.561018,2023-01-02 00:00:00,2.0
2,0.247676,2023-01-03 00:00:00,
3,0.433762,2023-01-04 00:00:00,
4,0.744252,2023-01-05 00:00:00,0.0
5,0.414882,2023-01-06 00:00:00,-5.0
6,0.308019,2023-01-07 00:00:00,-42.0
7,0.359695,2023-01-08 00:00:00,


In [3]:
df.head()

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
1,0.881005,2023-01-02 00:00:00,2.0
2,0.739494,2023-01-03 00:00:00,
3,0.287771,2023-01-04 00:00:00,
4,0.740038,2023-01-05 00:00:00,0.0


In [4]:
df.sample(3)

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
7,0.057387,2023-01-08 00:00:00,
3,0.287771,2023-01-04 00:00:00,


In [5]:
df.describe()

describe,a,b,c,d
str,f64,f64,str,f64
"""count""",8.0,8.0,"""8""",8.0
"""null_count""",0.0,0.0,"""0""",1.0
"""mean""",3.5,0.480167,,
"""std""",2.44949,0.32028,,
"""min""",0.0,0.057387,"""2023-01-01 00:...",-42.0
"""max""",7.0,0.881005,"""2023-01-08 00:...",2.0
"""median""",3.5,0.532404,,1.0


### Expressions

In [6]:
df.select(pl.col('*'))

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
1,0.881005,2023-01-02 00:00:00,2.0
2,0.739494,2023-01-03 00:00:00,
3,0.287771,2023-01-04 00:00:00,
4,0.740038,2023-01-05 00:00:00,0.0
5,0.402342,2023-01-06 00:00:00,-5.0
6,0.662465,2023-01-07 00:00:00,-42.0
7,0.057387,2023-01-08 00:00:00,


In [7]:
df.select(pl.col(['a', 'b']))

a,b
i64,f64
0,0.070837
1,0.881005
2,0.739494
3,0.287771
4,0.740038
5,0.402342
6,0.662465
7,0.057387


In [8]:
df.select([pl.col('a'), pl.col('b')]).limit(3)

a,b
i64,f64
0,0.070837
1,0.881005
2,0.739494


In [9]:
df.select([pl.exclude('a')])

b,c,d
f64,datetime[μs],f64
0.070837,2023-01-01 00:00:00,1.0
0.881005,2023-01-02 00:00:00,2.0
0.739494,2023-01-03 00:00:00,
0.287771,2023-01-04 00:00:00,
0.740038,2023-01-05 00:00:00,0.0
0.402342,2023-01-06 00:00:00,-5.0
0.662465,2023-01-07 00:00:00,-42.0
0.057387,2023-01-08 00:00:00,


### Filter

In [5]:
df.filter(
    pl.col('c').is_between(
        datetime(2023, 1, 2), datetime(2023, 1, 5), closed='both'))

a,b,c,d
i64,f64,datetime[μs],f64
1,0.175648,2023-01-02 00:00:00,2.0
2,0.839963,2023-01-03 00:00:00,
3,0.265376,2023-01-04 00:00:00,
4,0.12367,2023-01-05 00:00:00,0.0


In [6]:
df.filter((pl.col('a') <= 3) & (pl.col('d').is_not_nan()))

a,b,c,d
i64,f64,datetime[μs],f64
0,0.433417,2023-01-01 00:00:00,1.0
1,0.175648,2023-01-02 00:00:00,2.0


### With_Columns

In [7]:
df.with_columns([
    pl.col('b').sum().alias('e'),
    (pl.col('b') + 42).alias('b+42')])

a,b,c,d,e,b+42
i64,f64,datetime[μs],f64,f64,f64
0,0.433417,2023-01-01 00:00:00,1.0,3.03032,42.433417
1,0.175648,2023-01-02 00:00:00,2.0,3.03032,42.175648
2,0.839963,2023-01-03 00:00:00,,3.03032,42.839963
3,0.265376,2023-01-04 00:00:00,,3.03032,42.265376
4,0.12367,2023-01-05 00:00:00,0.0,3.03032,42.12367
5,0.199641,2023-01-06 00:00:00,-5.0,3.03032,42.199641
6,0.963555,2023-01-07 00:00:00,-42.0,3.03032,42.963555
7,0.02905,2023-01-08 00:00:00,,3.03032,42.02905


### Groupby

In [8]:
df2 = pl.DataFrame({
    'x': np.arange(8),
    'y': list('AAABBCXX')})
df2

x,y
i64,str
0,"""A"""
1,"""A"""
2,"""A"""
3,"""B"""
4,"""B"""
5,"""C"""
6,"""X"""
7,"""X"""


In [9]:
df2.groupby('y', maintain_order=True).count()

y,count
str,u32
"""A""",3
"""B""",2
"""C""",1
"""X""",2


In [10]:
(df2
 .groupby('y', maintain_order=True)
 .agg([
     pl.col('*').count().alias('count'),
     pl.col('*').sum().alias('sum')]))

y,count,sum
str,u32,i64
"""A""",3,3
"""B""",2,7
"""C""",1,5
"""X""",2,13


### Combining operations

In [4]:
dfx = (
    df
    .with_columns((pl.col('a') * pl.col('b')).alias('ab'))
    .select([pl.all().exclude(['c', 'd'])]))
dfx

a,b,ab
i64,f64,f64
0,0.561821,0.0
1,0.561018,0.561018
2,0.247676,0.495352
3,0.433762,1.301287
4,0.744252,2.977006
5,0.414882,2.074412
6,0.308019,1.848114
7,0.359695,2.517866


In [6]:
dfy = (
    df
    .with_columns((pl.col('a') * pl.col('b')).alias('ab'))
    .select([pl.all().exclude('d')]))
dfy

a,b,c,ab
i64,f64,datetime[μs],f64
0,0.561821,2023-01-01 00:00:00,0.0
1,0.561018,2023-01-02 00:00:00,0.561018
2,0.247676,2023-01-03 00:00:00,0.495352
3,0.433762,2023-01-04 00:00:00,1.301287
4,0.744252,2023-01-05 00:00:00,2.977006
5,0.414882,2023-01-06 00:00:00,2.074412
6,0.308019,2023-01-07 00:00:00,1.848114
7,0.359695,2023-01-08 00:00:00,2.517866


### Combining dataframes

In [9]:
df.join(df2, left_on='a', right_on='x')

a,b,c,d,y
i64,f64,datetime[μs],f64,str
0,0.561821,2023-01-01 00:00:00,1.0,"""A"""
1,0.561018,2023-01-02 00:00:00,2.0,"""A"""
2,0.247676,2023-01-03 00:00:00,,"""A"""
3,0.433762,2023-01-04 00:00:00,,"""B"""
4,0.744252,2023-01-05 00:00:00,0.0,"""B"""
5,0.414882,2023-01-06 00:00:00,-5.0,"""C"""
6,0.308019,2023-01-07 00:00:00,-42.0,"""X"""
7,0.359695,2023-01-08 00:00:00,,"""X"""


### Concat

In [10]:
pl.concat([df, df2], how='horizontal')

a,b,c,d,x,y
i64,f64,datetime[μs],f64,i64,str
0,0.561821,2023-01-01 00:00:00,1.0,0,"""A"""
1,0.561018,2023-01-02 00:00:00,2.0,1,"""A"""
2,0.247676,2023-01-03 00:00:00,,2,"""A"""
3,0.433762,2023-01-04 00:00:00,,3,"""B"""
4,0.744252,2023-01-05 00:00:00,0.0,4,"""B"""
5,0.414882,2023-01-06 00:00:00,-5.0,5,"""C"""
6,0.308019,2023-01-07 00:00:00,-42.0,6,"""X"""
7,0.359695,2023-01-08 00:00:00,,7,"""X"""
