In [None]:
import polars as pl

starwars = pl.scan_parquet("data/starwars.parquet")
rebels = starwars.drop("films").filter(
    pl.col("name").is_in(["Luke Skywalker", "Leia Organa", "Han Solo"])
)

In [4]:
rebels.select(
    "name",
    pl.col("homeworld"),
    pl.col("^.*color$"),
    (pl.col("height") / 100).alias("height_m"),
).collect()

name,homeworld,hair_color,skin_color,eye_color,height_m
str,str,str,str,str,f64
"""Han Solo""","""Corellia""","""brown""","""fair""","""brown""",1.8
"""Leia Organa""","""Alderaan""","""brown""","""light""","""brown""",1.5
"""Luke Skywalker""","""Tatooine""","""blond""","""fair""","""blue""",1.72


In [None]:
import polars.selectors as cs

rebels.select(
    "name",
    cs.by_name("homeworld"),
    cs.by_name("^.*color$"),
    (cs.by_name("height") / 100).alias("height_m"),
).collect()

name,homeworld,hair_color,skin_color,eye_color,height_m
str,str,str,str,str,f64
"""Han Solo""","""Corellia""","""brown""","""fair""","""brown""",1.8
"""Leia Organa""","""Alderaan""","""brown""","""light""","""brown""",1.5
"""Luke Skywalker""","""Tatooine""","""blond""","""fair""","""blue""",1.72


In [6]:
rebels.select(cs.starts_with("birth")).collect()

birth_year,birth_date
f64,date
29.0,1948-06-01
19.0,1958-05-30
19.0,1958-05-30


In [7]:
rebels.select(cs.ends_with("_color")).collect()

hair_color,skin_color,eye_color
str,str,str
"""brown""","""fair""","""brown"""
"""brown""","""light""","""brown"""
"""blond""","""fair""","""blue"""


In [8]:
rebels.select(cs.contains("_")).collect()

hair_color,skin_color,eye_color,birth_year,birth_date,screen_time
str,str,str,f64,date,duration[μs]
"""brown""","""fair""","""brown""",29.0,1948-06-01,1h 12m 37s
"""brown""","""light""","""brown""",19.0,1958-05-30,1h 3m 40s
"""blond""","""fair""","""blue""",19.0,1958-05-30,1h 58m 44s


In [9]:
rebels.select(cs.matches("^[a-z]{4}$")).collect()

name,mass
str,f64
"""Han Solo""",80.0
"""Leia Organa""",49.0
"""Luke Skywalker""",77.0


In [10]:
rebels.group_by("hair_color").agg(cs.numeric().mean()).collect()

hair_color,height,mass,birth_year
str,f64,f64,f64
"""blond""",172.0,77.0,19.0
"""brown""",165.0,64.5,24.0


In [11]:
rebels.select(cs.string()).collect()

name,hair_color,skin_color,eye_color,homeworld,species
str,str,str,str,str,str
"""Han Solo""","""brown""","""fair""","""brown""","""Corellia""","""Human"""
"""Leia Organa""","""brown""","""light""","""brown""","""Alderaan""","""Human"""
"""Luke Skywalker""","""blond""","""fair""","""blue""","""Tatooine""","""Human"""


In [None]:
# .explode(pl.col("vehicles"))
rebels.select(cs.by_dtype(pl.List(pl.String))).collect()

vehicles,starships
list[str],list[str]
,"[""Millennium Falcon"", ""Imperial shuttle""]"
"[""Imperial Speeder Bike""]",
"[""Snowspeeder"", ""Imperial Speeder Bike""]","[""X-wing"", ""Imperial shuttle""]"


In [None]:
rebels.select(cs.by_name("hair_color") | cs.numeric()).collect()

height,mass,hair_color,birth_year
u16,f64,str,f64
180,80.0,"""brown""",29.0
150,49.0,"""brown""",19.0
172,77.0,"""blond""",19.0


walrus :=

In [None]:
df = pl.DataFrame({"d": 1, "i": True, "s": True, "c": True, "o": 1.0})

In [None]:
df

d,i,s,c,o
i64,bool,bool,bool,f64
1,True,True,True,1.0


In [None]:
print(df.select(first := cs.by_name("c", "i"), ~first))

shape: (1, 5)
┌──────┬──────┬─────┬──────┬─────┐
│ c    ┆ i    ┆ d   ┆ s    ┆ o   │
│ ---  ┆ ---  ┆ --- ┆ ---  ┆ --- │
│ bool ┆ bool ┆ i64 ┆ bool ┆ f64 │
╞══════╪══════╪═════╪══════╪═════╡
│ true ┆ true ┆ 1   ┆ true ┆ 1.0 │
└──────┴──────┴─────┴──────┴─────┘


In [24]:
print(f"first: {first}, ~first:{~first}")

first: cols(["c", "i"]), ~first:selector


In [None]:
(
    starwars.select(
        "name",
        (pl.col("mass") / ((pl.col("height") / 100) ** 2)).alias("bmi"),
        "species",
    )
    .drop_nulls()
    .top_k(5, by="bmi")
    .collect()
)

name,bmi,species
str,f64,str
"""Jabba Desilijic Tiure""",443.428571,"""Hutt"""
"""Dud Bolt""",50.928022,"""Vulptereen"""
"""Yoda""",39.02663,"""Yoda's species"""
"""Owen Lars""",37.874006,"""Human"""
"""IG-88""",35.0,"""Droid"""
