# Chapter 7: Beginning Expressions

In [1]:
import polars as pl
pl.show_versions()  # The book is built with Polars version 1.0.0

## Methods and Namespaces

## Expressions by Example

In [4]:
import polars as pl

fruit = pl.read_csv("data/fruit.csv")
fruit

### Selecting Columns with Expressions

In [6]:
fruit.select(
    pl.col("name"),  
    pl.col("^.*or.*$"),  
    pl.col("weight") / 1000,  
    "is_round"  
)

### Creating New Columns with Expressions

In [8]:
fruit.with_columns(
    pl.lit(True).alias("is_fruit"),  
    pl.col("name").str.ends_with("berry").alias("is_berry")  
)

### Filtering Rows with Expressions

In [10]:
fruit.filter(
    pl.col("is_round") &  
    (pl.col("weight") > 1000)  
)

### Aggregating with Expressions

In [12]:
fruit.group_by(
    pl.col("origin").str.split(" ").list.last()  
).agg(
    pl.len(),  
    pl.col("weight").mean().alias("average_weight")  
)

### Sorting Rows with Expressions

In [14]:
fruit.sort(
    pl.col("name").str.len_bytes(),  
    descending=True  
)

## What Exactly Is an Expression?

In [16]:
(
    pl.DataFrame({"a": [1, 2, 3], "b": [0.4, 0.5, 0.6]})
    .with_columns(pl.all().mul(10).name.suffix("_times_10"))
)

In [17]:
pl.all().mul(10).name.suffix("_times_10").meta.has_multiple_outputs()

### Properties of Expressions

In [19]:
is_orange = (pl.col("color") == "orange").alias("is_orange")

fruit.with_columns(is_orange)

In [20]:
fruit.filter(is_orange)

In [21]:
fruit.group_by(is_orange).len()

In [22]:
flowers = pl.DataFrame({
    "name": ["Tiger lily", "Blue flag", "African marigold"],
    "latin": ["Lilium columbianum", "Iris versicolor", "Tagetes erecta"],
    "color": ["orange", "purple", "orange"]
})

flowers.filter(is_orange)

## Creating Expressions

### From Existing Columns

In [25]:
fruit.select(pl.col("color")).columns

In [26]:
# This raises a ColumnNotFoundError:
# fruit.select(pl.col("is_smelly")).columns

In [27]:
fruit.select(pl.col("^.*or.*$")).columns

In [28]:
fruit.select(pl.all()).columns

In [29]:
fruit.select(pl.col(pl.String)).columns

In [30]:
fruit.select(pl.col(pl.Boolean, pl.Int64)).columns

In [31]:
fruit.select(pl.col(["name", "color"])).columns

In [32]:
# This raises a TypeError:
# fruit.select(pl.col([pl.String, "is_round"])).columns

In [33]:
pl.NUMERIC_DTYPES

In [34]:
(
    fruit
    .with_columns((pl.col("weight") / 1000).alias("weight_kg"))
    .select(pl.col(pl.NUMERIC_DTYPES))
    .head()
)

### From Literal Values

In [36]:
pl.select(pl.lit(42))

In [37]:
pl.select(pl.lit(42).alias("answer"))

In [38]:
fruit.with_columns(pl.lit("Earth").alias("planet"))

In [39]:
# This raises a ShapeError:
# fruit.with_columns(pl.lit(pl.Series([False, True])).alias("row_is_even"))

In [40]:
fruit.with_columns(pl.lit([False, True]).alias("row_is_even"))

In [41]:
pl.select(
    pl.repeat("Ello", 3).alias("hello"),
    pl.zeros(3),
    pl.ones(3)
)

In [42]:
# This raises a ShapeError:
# fruit.with_columns(pl.repeat("Earth", 9).alias("planet"))

### From Ranges

In [44]:
pl.select(
    pl.int_range(0, 5).alias("start"),
    pl.arange(0, 10, 2).pow(2).alias("end")
).with_columns(
    pl.int_ranges("start", "end").alias("int_range")
).with_columns(
    pl.col("int_range").list.len().alias("range_length")
)

In [45]:
from datetime import date

pl.select(
    pl.date_range(date(1985, 10, 21), date(1985, 10, 26)).alias("start"),
    pl.repeat(date(2021, 10, 21), 6).alias("end")
).with_columns(
    pl.datetime_ranges("start", "end", interval="1h").alias("range")
)

### Other Functions to Create Expressions

## Renaming Expressions

In [48]:
df = pl.DataFrame({"text": "value", "An integer": 5040, "BOOLEAN": True})
df


In [49]:
df.select(
    pl.col("text").name.to_uppercase(),
    pl.col("An integer").alias("int"),
    pl.col("BOOLEAN").name.to_lowercase(),
)

In [50]:
# This raises a PanicException:
# df.select(
#     pl.all()
#     .name.to_lowercase()
#     .name.map(lambda s: s.replace(" ", "_"))
# )

In [51]:
df.select(
    pl.all()
    .name.map(lambda s: s.lower().replace(" ", "_"))
)

## Expressions Are Idiomatic

In [53]:
fruit.filter(
    (fruit["weight"] > 1000) & fruit["is_round"]
)

In [54]:
(
    fruit
    .lazy()
    .filter((pl.col("weight") > 1000) & pl.col("is_round"))
    .with_columns(pl.col("name").str.ends_with("berry").alias("is_berry"))
    .collect()
)

In [55]:
# This raises a ShapeError:
# (
#     fruit
#     .lazy()
#     .filter((fruit["weight"] > 1000) & fruit["is_round"])
#     .with_columns(fruit["name"].str.ends_with("berry").alias("is_berry"))
#     .collect()
# )

## Conclusion