# Getting started

In [33]:
import polars as pl
import datetime as dt
from pathlib import Path


path = Path.cwd() / "static" / "data.csv"
path.parent.mkdir(exist_ok=True)

In [None]:
df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            dt.date(1997, 1, 10),
            dt.date(1985, 2, 15),
            dt.date(1983, 3, 22),
            dt.date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],
        "height": [1.56, 1.77, 1.65, 1.75],
    }
)

df.write_csv(path)

df_csv = pl.read_csv(path, try_parse_dates=True)

## Selecting

In [38]:
df_selected = (
    df
    .select(["name", "weight", "height", "birthdate"])
)

df_selected

name,weight,height,birthdate
str,f64,f64,date
"""Alice Archer""",57.9,1.56,1997-01-10
"""Ben Brown""",72.5,1.77,1985-02-15
"""Chloe Cooper""",53.6,1.65,1983-03-22
"""Daniel Donovan""",83.1,1.75,1981-04-30


## Mutating

In [36]:
df_mutated = (
    df
    .with_columns(
        birth_year = pl.col("birthdate").dt.year(),
        bmi = (pl.col("weight") / pl.col("height")**2).round(1)
    )
)

df_mutated

name,birthdate,weight,height,birth_year,bmi
str,date,f64,f64,i32,f64
"""Alice Archer""",1997-01-10,57.9,1.56,1997,23.8
"""Ben Brown""",1985-02-15,72.5,1.77,1985,23.1
"""Chloe Cooper""",1983-03-22,53.6,1.65,1983,19.7
"""Daniel Donovan""",1981-04-30,83.1,1.75,1981,27.1


## Filtering

In [37]:
df_filtered = (
    df
    .filter(pl.col("birthdate").dt.month() == 3)
)

df_filtered

name,birthdate,weight,height
str,date,f64,f64
"""Chloe Cooper""",1983-03-22,53.6,1.65


## Grouping

In [44]:
df_grouped = (
    df
    .group_by(
        decate = pl.col("birthdate").dt.year() // 10 * 10
    )
    .agg(
        sample_size = pl.len(),
        average_weight = pl.col("weight").mean().round(2),
        tallest = pl.col("height").max()
    )
)

df_grouped

decate,sample_size,average_weight,tallest
i32,u32,f64,f64
1990,1,57.9,1.56
1980,3,69.73,1.77


In [None]:
df_grouped = (
    df
    .group_by(
        decate = pl.col("birthdate").dt.year() // 10 * 10
    )
)

df_grouped

## Combining

## More complex operations