# Chapter 7: Beginning Expressions

In [None]:
import polars as pl
pl.show_versions()  # The book is built with Polars version 1.13.1

## Methods and Namespaces

## Expressions by Example

In [None]:
fruit = pl.read_csv("data/fruit.csv")
fruit

### Selecting Columns with Expressions

In [None]:
fruit.select(
    pl.col("name"),  
    pl.col("^.*or.*$"),  
    pl.col("weight") / 1000,  
    "is_round",  
)

### Creating New Columns with Expressions

In [None]:
fruit.with_columns(
    pl.lit(True).alias("is_fruit"),  
    is_berry=pl.col("name").str.ends_with("berry"),  
)

### Filtering Rows with Expressions

In [None]:
fruit.filter(
    (pl.col("weight") > 1000)  
    & pl.col("is_round")  
)

### Aggregating with Expressions

In [None]:
fruit.group_by(pl.col("origin").str.split(" ").list.last()).agg(  
    pl.len(),  
    average_weight=pl.col("weight").mean()  
)

### Sorting Rows with Expressions

In [None]:
fruit.sort(
    pl.col("name").str.len_bytes(),  # <1> <2>
    descending=True,  
)

## The Definition of an Expression

In [None]:
(
    pl.DataFrame({"a": [1, 2, 3], "b": [0.4, 0.5, 0.6]}).with_columns(
        pl.all().mul(10).name.suffix("_times_10")
    )
)

In [None]:
pl.all().mul(10).name.suffix("_times_10").meta.has_multiple_outputs()

### Properties of Expressions

In [None]:
is_orange = (pl.col("color") == "orange").alias("is_orange")

fruit.with_columns(is_orange)

In [None]:
fruit.filter(is_orange)

In [None]:
fruit.group_by(is_orange).len()

In [None]:
flowers = pl.DataFrame(
    {
        "name": ["Tiger lily", "Blue flag", "African marigold"],
        "latin": ["Lilium columbianum", "Iris versicolor", "Tagetes erecta"],
        "color": ["orange", "purple", "orange"],
    }
)

flowers.filter(is_orange)

## Creating Expressions

### From Existing Columns

In [None]:
fruit.select(pl.col("color")).columns

In [None]:
# This raises a ColumnNotFoundError:
# fruit.select(pl.col("is_smelly")).columns

In [None]:
fruit.select(pl.col("^.*or.*$")).columns

In [None]:
fruit.select(pl.all()).columns

In [None]:
fruit.select(pl.col(pl.String)).columns

In [None]:
fruit.select(pl.col(pl.Boolean, pl.Int64)).columns

In [None]:
fruit.select(pl.col(["name", "color"])).columns

### From Literal Values

In [None]:
pl.select(pl.lit(42))

In [None]:
pl.select(pl.lit(42).alias("answer"))

In [None]:
pl.select(answer=pl.lit(42))

In [None]:
fruit.with_columns(planet=pl.lit("Earth"))

In [None]:
fruit.with_columns(row_is_even=pl.lit([False, True]))

In [None]:
pl.select(pl.repeat("Ella", 3).alias("umbrella"), pl.zeros(3), pl.ones(3))

In [None]:
# This raises a ShapeError:
# fruit.with_columns(planet=pl.repeat("Earth", 9))

### From Ranges

In [None]:
pl.select(
    start=pl.int_range(0, 5), end=pl.arange(0, 10, 2).pow(2)
).with_columns(int_range=pl.int_ranges("start", "end")).with_columns(
    range_length=pl.col("int_range").list.len()
)

In [None]:
from datetime import date

pl.select(
    start=pl.date_range(date(1985, 10, 21), date(1985, 10, 26)),
    end=pl.repeat(date(2021, 10, 21), 6),
).with_columns(range=pl.datetime_ranges("start", "end", interval="1h"))

### Other Functions to Create Expressions

## Renaming Expressions

In [None]:
df = pl.DataFrame({"text": "value", "An integer": 5040, "BOOLEAN": True})
df

In [None]:
df.select(
    pl.col("text").name.to_uppercase(),
    pl.col("An integer").alias("int"),
    pl.col("BOOLEAN").name.to_lowercase(),
)

In [None]:
# This raises a PanicException:
# df.select(
#     pl.all()
#     .name.to_lowercase()
#     .name.map(lambda s: s.replace(" ", "_"))
# )

In [None]:
df.select(pl.all().name.map(lambda s: s.lower().replace(" ", "_")))

## Expressions Are Idiomatic

In [None]:
fruit.filter((fruit["weight"] > 1000) & fruit["is_round"])

In [None]:
(
    fruit.lazy()
    .filter((pl.col("weight") > 1000) & pl.col("is_round"))
    .with_columns(is_berry=pl.col("name").str.ends_with("berry"))
    .collect()
)

In [None]:
# This raises a ShapeError:
# (
#     fruit
#     .lazy()
#     .filter((fruit["weight"] > 1000) & fruit["is_round"])
#     .with_columns(is_berry=fruit["name"].str.ends_with("berry"))
#     .collect()
# )

## Takeaways