In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import polars as pl


In [2]:
df = pl.DataFrame(
    {
         "A": [1, 2, 3, 4, 5],
         "fruits": ["banana", "banana", "apple", "apple", "banana"],
         "B": [5, 4, 3, 2, 1],
         "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
     }
 )

In [3]:
df.sort("fruits")

A,fruits,B,cars
i64,str,i64,str
3,"""apple""",3,"""beetle"""
4,"""apple""",2,"""beetle"""
1,"""banana""",5,"""beetle"""
2,"""banana""",4,"""audi"""
5,"""banana""",1,"""beetle"""


In [4]:
df.sort("fruits").select(
...     "fruits",
...     "cars",
...     pl.lit("fruits").alias("literal_string_fruits"),
...     pl.col("B").filter(pl.col("cars") == "beetle").sum(),
...     pl.col("A").filter(pl.col("B") > 2).sum().over("cars").alias("sum_A_by_cars"),
...     pl.col("A").sum().over("fruits").alias("sum_A_by_fruits"),
...     pl.col("A").reverse().over("fruits").alias("rev_A_by_fruits"),
...     pl.col("A").sort_by("B").over("fruits").alias("sort_A_by_B_by_fruits"),
... )

fruits,cars,literal_string_fruits,B,sum_A_by_cars,sum_A_by_fruits,rev_A_by_fruits,sort_A_by_B_by_fruits
str,str,str,i64,i64,i64,i64,i64
"""apple""","""beetle""","""fruits""",11,4,7,4,4
"""apple""","""beetle""","""fruits""",11,4,7,3,3
"""banana""","""beetle""","""fruits""",11,4,8,5,5
"""banana""","""audi""","""fruits""",11,2,8,2,2
"""banana""","""beetle""","""fruits""",11,4,8,1,1


In [5]:
import polars as pl
import datetime as dt

df = pl.DataFrame(
    {
        "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
        "birthdate": [
            dt.date(1997, 1, 10),
            dt.date(1985, 2, 15),
            dt.date(1983, 3, 22),
            dt.date(1981, 4, 30),
        ],
        "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
        "height": [1.56, 1.77, 1.65, 1.75],  # (m)
    }
)

print(df)

shape: (4, 4)
┌────────────────┬────────────┬────────┬────────┐
│ name           ┆ birthdate  ┆ weight ┆ height │
│ ---            ┆ ---        ┆ ---    ┆ ---    │
│ str            ┆ date       ┆ f64    ┆ f64    │
╞════════════════╪════════════╪════════╪════════╡
│ Alice Archer   ┆ 1997-01-10 ┆ 57.9   ┆ 1.56   │
│ Ben Brown      ┆ 1985-02-15 ┆ 72.5   ┆ 1.77   │
│ Chloe Cooper   ┆ 1983-03-22 ┆ 53.6   ┆ 1.65   │
│ Daniel Donovan ┆ 1981-04-30 ┆ 83.1   ┆ 1.75   │
└────────────────┴────────────┴────────┴────────┘


In [10]:
(pl.col("weight") /(pl.col("height") ** 2)) .alias("BMI")
#(pl.col("weight") / (pl.col("height") ** 2)).alias("bmi")

In [11]:
result = df.select(
    pl.col("name"),
    pl.col("birthdate").dt.year().alias("birth_year"),
    (pl.col("weight") /(pl.col("height") ** 2)) .alias("BMI"),
)
print(result)

shape: (4, 3)
┌────────────────┬────────────┬───────────┐
│ name           ┆ birth_year ┆ BMI       │
│ ---            ┆ ---        ┆ ---       │
│ str            ┆ i32        ┆ f64       │
╞════════════════╪════════════╪═══════════╡
│ Alice Archer   ┆ 1997       ┆ 23.791913 │
│ Ben Brown      ┆ 1985       ┆ 23.141498 │
│ Chloe Cooper   ┆ 1983       ┆ 19.687787 │
│ Daniel Donovan ┆ 1981       ┆ 27.134694 │
└────────────────┴────────────┴───────────┘


In [13]:
df.select(
    pl.col("name"),
    (pl.col("weight","height")*0.95).round(2).name.suffix("-5%")
)

name,weight-5%,height-5%
str,f64,f64
"""Alice Archer""",55.0,1.48
"""Ben Brown""",68.88,1.68
"""Chloe Cooper""",50.92,1.57
"""Daniel Donovan""",78.94,1.66


In [15]:
df.with_columns(
    brith_year = pl.col("birthdate").dt.year(),
    bmi=pl.col("weight")/(pl.col("height")**2)
)

name,birthdate,weight,height,brith_year,bmi
str,date,f64,f64,i32,f64
"""Alice Archer""",1997-01-10,57.9,1.56,1997,23.791913
"""Ben Brown""",1985-02-15,72.5,1.77,1985,23.141498
"""Chloe Cooper""",1983-03-22,53.6,1.65,1983,19.687787
"""Daniel Donovan""",1981-04-30,83.1,1.75,1981,27.134694


In [20]:
df.filter(pl.col("birthdate").dt.year() < 1990,
          pl.col("height") < 1.77
)

name,birthdate,weight,height
str,date,f64,f64
"""Chloe Cooper""",1983-03-22,53.6,1.65
"""Daniel Donovan""",1981-04-30,83.1,1.75
