# Getting started

In [None]:
import polars as pl
import datetime as dt
from pathlib import Path


path = Path.cwd() / "static" / "data.csv"
path.parent.mkdir(exist_ok=True)

## DataFrame operations

In [None]:
df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            dt.date(1997, 1, 10),
            dt.date(1985, 2, 15),
            dt.date(1983, 3, 22),
            dt.date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],
        "height": [1.56, 1.77, 1.65, 1.75],
    }
)

df.write_csv(path)

df_csv = pl.read_csv(path, try_parse_dates=True)

## Selecting

In [None]:
df_selected = df.select(["name", "weight", "height", "birthdate"])

df_selected

## Mutating

In [None]:
df_mutated = df.with_columns(
    birth_year=pl.col("birthdate").dt.year(),
    bmi=(pl.col("weight") / pl.col("height") ** 2).round(1),
)

df_mutated

## Filtering

In [None]:
df_filtered = df.filter(pl.col("birthdate").dt.month() == 3)

df_filtered

## Grouping

In [None]:
df_grouped = df.group_by(decade=pl.col("birthdate").dt.year() // 10 * 10).agg(
    sample_size=pl.len(),
    average_weight=pl.col("weight").mean().round(2),
    tallest=pl.col("height").max(),
)

df_grouped

In [None]:
df_complex = (
    df.with_columns(
        (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
        pl.col("name").str.split(by=" ").list.first(),
    )
    .select(
        pl.all().exclude("birthdate"),
    )
    .group_by(
        pl.col("decade"),
        maintain_order=True,
    )
    .agg(
        pl.col("name"),
        pl.col("weight", "height").mean().round(2).name.prefix("avg_"),
    )
)

df_complex

## Combining

### Joining

In [None]:
df_new = pl.DataFrame(
    {
        "name": ["Ben Brown", "Daniel Donovan", "Alice Archer", "Chloe Cooper"],
        "parent": [True, False, False, False],
        "siblings": [1, 2, 3, 4],
    }
)

df_new

In [None]:
df.join(df_new, on="name", how="left")

### Concatenating

In [None]:
df_new = pl.DataFrame(
    {
        "name": ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],
        "birthdate": [
            dt.date(1977, 5, 10),
            dt.date(1975, 6, 23),
            dt.date(1973, 7, 22),
            dt.date(1971, 8, 3),
        ],
        "weight": [67.9, 72.5, 57.6, 93.1],
        "height": [1.76, 1.6, 1.66, 1.8],
    }
)

df_new

In [None]:
pl.concat([df, df_new], how="vertical")