# Introducing Polars Expressions

In [1]:
import polars as pl
pl.show_versions()

--------Version info---------
Polars:               0.20.23
Index type:           UInt32
Platform:             macOS-12.5-arm64-arm-64bit
Python:               3.12.3 (main, Apr  9 2024, 16:03:47) [Clang 14.0.0 (clang-1400.0.29.202)]

----Optional dependencies----
adbc_driver_manager:  <not installed>
cloudpickle:          3.0.0
connectorx:           <not installed>
deltalake:            <not installed>
fastexcel:            0.9.1
fsspec:               2023.12.2
gevent:               <not installed>
hvplot:               0.9.2
matplotlib:           3.8.4
nest_asyncio:         1.6.0
numpy:                1.26.4
openpyxl:             3.1.2
pandas:               2.2.2
pyarrow:              14.0.2
pydantic:             <not installed>
pyiceberg:            <not installed>
pyxlsb:               <not installed>
sqlalchemy:           <not installed>
xlsx2csv:             0.8.2
xlsxwriter:           3.2.0


## Expressions by Example

In [2]:
fruit = pl.read_csv("data/fruit.csv")
fruit

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Avocado""",200,"""green""",False,"""South America"""
"""Banana""",120,"""yellow""",False,"""Asia"""
"""Blueberry""",1,"""blue""",False,"""North America"""
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Cranberry""",2,"""red""",False,"""North America"""
"""Elderberry""",1,"""black""",False,"""Europe"""
"""Orange""",130,"""orange""",True,"""Asia"""
"""Papaya""",1000,"""orange""",False,"""South America"""
"""Peach""",150,"""orange""",True,"""Asia"""
"""Watermelon""",5000,"""green""",True,"""Africa"""


### Selecting Columns with Expressions

In [3]:
fruit.select(
    pl.col("name"), 
    pl.col("^.*or.*$"), 
    pl.col("weight") / 1000,
    "is_round"
)

name,color,origin,weight,is_round
str,str,str,f64,bool
"""Avocado""","""green""","""South America""",0.2,False
"""Banana""","""yellow""","""Asia""",0.12,False
"""Blueberry""","""blue""","""North America""",0.001,False
"""Cantaloupe""","""orange""","""Africa""",2.5,True
"""Cranberry""","""red""","""North America""",0.002,False
"""Elderberry""","""black""","""Europe""",0.001,False
"""Orange""","""orange""","""Asia""",0.13,True
"""Papaya""","""orange""","""South America""",1.0,False
"""Peach""","""orange""","""Asia""",0.15,True
"""Watermelon""","""green""","""Africa""",5.0,True


### Creating New Columns with Expressions

In [4]:
fruit.with_columns(
    pl.lit(True).alias("is_fruit"),
    pl.col("name").str.ends_with("berry").alias("is_berry") 
)

name,weight,color,is_round,origin,is_fruit,is_berry
str,i64,str,bool,str,bool,bool
"""Avocado""",200,"""green""",False,"""South America""",True,False
"""Banana""",120,"""yellow""",False,"""Asia""",True,False
"""Blueberry""",1,"""blue""",False,"""North America""",True,True
"""Cantaloupe""",2500,"""orange""",True,"""Africa""",True,False
"""Cranberry""",2,"""red""",False,"""North America""",True,True
"""Elderberry""",1,"""black""",False,"""Europe""",True,True
"""Orange""",130,"""orange""",True,"""Asia""",True,False
"""Papaya""",1000,"""orange""",False,"""South America""",True,False
"""Peach""",150,"""orange""",True,"""Asia""",True,False
"""Watermelon""",5000,"""green""",True,"""Africa""",True,False


### Filtering Rows with Expressions

In [5]:
fruit.filter(
    pl.col("is_round") & (pl.col("weight") > 1000) 
)

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Watermelon""",5000,"""green""",True,"""Africa"""


### Aggregating with Expressions

In [6]:
fruit.group_by(
    pl.col("origin").str.split(" ").list.last()  
).agg(
    pl.len(),
    pl.col("weight").mean().alias("average_weight")
)

origin,len,average_weight
str,u32,f64
"""Africa""",2,3750.0
"""Asia""",3,133.333333
"""Europe""",1,1.0
"""America""",4,300.75


### Sorting Rows with Expressions

In [7]:
fruit.sort(
    pl.col("name").str.len_bytes(),
    descending=True
)

name,weight,color,is_round,origin
str,i64,str,bool,str
"""Cantaloupe""",2500,"""orange""",True,"""Africa"""
"""Elderberry""",1,"""black""",False,"""Europe"""
"""Watermelon""",5000,"""green""",True,"""Africa"""
"""Blueberry""",1,"""blue""",False,"""North America"""
"""Cranberry""",2,"""red""",False,"""North America"""
"""Avocado""",200,"""green""",False,"""South America"""
"""Banana""",120,"""yellow""",False,"""Asia"""
"""Orange""",130,"""orange""",True,"""Asia"""
"""Papaya""",1000,"""orange""",False,"""South America"""
"""Peach""",150,"""orange""",True,"""Asia"""


## So What Exactly Is an Expression?

> An expression is a tree of operations that describe how to construct one
or more Series.

## Fin.