In [1]:
from datetime import datetime, timedelta

import numpy as np
import polars as pl

In [2]:
series = pl.Series('a', [1, 2, 3, 4, 5])
series

a
i64
1
2
3
4
5


In [3]:
series = pl.Series([1, 2, 3, 4, 5])
series

1
2
3
4
5


In [4]:
df = pl.DataFrame({
    'id': [1, 2, 3],
    'date':[(datetime(2000, 1, i)) for i in [1, 2, 3]],
    'stuff': [4.3, 5.4, 6.5]})
df

id,date,stuff
i64,datetime[μs],f64
1,2000-01-01 00:00:00,4.3
2,2000-01-02 00:00:00,5.4
3,2000-01-03 00:00:00,6.5


In [5]:
df.write_csv('polar_test.csv')

In [6]:
df = pl.read_csv('polar_test.csv')
df

id,date,stuff
i64,str,f64
1,"""2000-01-01T00:...",4.3
2,"""2000-01-02T00:...",5.4
3,"""2000-01-03T00:...",6.5


In [7]:
df = pl.read_csv('polar_test.csv', parse_dates=True)
df

id,date,stuff
i64,datetime[μs],f64
1,2000-01-01 00:00:00,4.3
2,2000-01-02 00:00:00,5.4
3,2000-01-03 00:00:00,6.5


### Viewing Data

In [2]:
df = pl.DataFrame({
    'a': np.arange(8),
    'b': np.random.rand(8),
    'c': [datetime(2023, 1, i) for i in range(1, 9)],
    'd': [1, 2.0, np.nan, np.nan, 0, -5, -42, None]})
df

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
1,0.881005,2023-01-02 00:00:00,2.0
2,0.739494,2023-01-03 00:00:00,
3,0.287771,2023-01-04 00:00:00,
4,0.740038,2023-01-05 00:00:00,0.0
5,0.402342,2023-01-06 00:00:00,-5.0
6,0.662465,2023-01-07 00:00:00,-42.0
7,0.057387,2023-01-08 00:00:00,


In [3]:
df.head()

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
1,0.881005,2023-01-02 00:00:00,2.0
2,0.739494,2023-01-03 00:00:00,
3,0.287771,2023-01-04 00:00:00,
4,0.740038,2023-01-05 00:00:00,0.0


In [4]:
df.sample(3)

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
7,0.057387,2023-01-08 00:00:00,
3,0.287771,2023-01-04 00:00:00,


In [5]:
df.describe()

describe,a,b,c,d
str,f64,f64,str,f64
"""count""",8.0,8.0,"""8""",8.0
"""null_count""",0.0,0.0,"""0""",1.0
"""mean""",3.5,0.480167,,
"""std""",2.44949,0.32028,,
"""min""",0.0,0.057387,"""2023-01-01 00:...",-42.0
"""max""",7.0,0.881005,"""2023-01-08 00:...",2.0
"""median""",3.5,0.532404,,1.0


### Expressions

In [6]:
df.select(pl.col('*'))

a,b,c,d
i64,f64,datetime[μs],f64
0,0.070837,2023-01-01 00:00:00,1.0
1,0.881005,2023-01-02 00:00:00,2.0
2,0.739494,2023-01-03 00:00:00,
3,0.287771,2023-01-04 00:00:00,
4,0.740038,2023-01-05 00:00:00,0.0
5,0.402342,2023-01-06 00:00:00,-5.0
6,0.662465,2023-01-07 00:00:00,-42.0
7,0.057387,2023-01-08 00:00:00,


In [7]:
df.select(pl.col(['a', 'b']))

a,b
i64,f64
0,0.070837
1,0.881005
2,0.739494
3,0.287771
4,0.740038
5,0.402342
6,0.662465
7,0.057387


In [8]:
df.select([pl.col('a'), pl.col('b')]).limit(3)

a,b
i64,f64
0,0.070837
1,0.881005
2,0.739494


In [9]:
df.select([pl.exclude('a')])

b,c,d
f64,datetime[μs],f64
0.070837,2023-01-01 00:00:00,1.0
0.881005,2023-01-02 00:00:00,2.0
0.739494,2023-01-03 00:00:00,
0.287771,2023-01-04 00:00:00,
0.740038,2023-01-05 00:00:00,0.0
0.402342,2023-01-06 00:00:00,-5.0
0.662465,2023-01-07 00:00:00,-42.0
0.057387,2023-01-08 00:00:00,
