# Chapter 11: Filtering and Sorting Rows

In [1]:
import polars as pl
pl.show_versions()

--------Version info---------
Polars:               0.20.31
Index type:           UInt32
Platform:             macOS-12.5-arm64-arm-64bit
Python:               3.11.9 (main, Apr  2 2024, 16:11:47) [Clang 14.0.0 (clang-1400.0.29.202)]

----Optional dependencies----
adbc_driver_manager:  0.8.0
cloudpickle:          3.0.0
connectorx:           0.3.2
deltalake:            0.15.0
fastexcel:            0.9.1
fsspec:               2023.12.2
gevent:               23.9.1
hvplot:               0.9.2
matplotlib:           3.8.4
nest_asyncio:         1.6.0
numpy:                1.26.4
openpyxl:             3.1.2
pandas:               2.2.2
pyarrow:              14.0.2
pydantic:             2.5.3
pyiceberg:            0.5.1
pyxlsb:               <not installed>
sqlalchemy:           2.0.25
torch:                <not installed>
xlsx2csv:             0.8.2
xlsxwriter:           3.2.0


In [2]:
tools = pl.read_csv("data/tools.csv")
tools

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


## Filtering Rows

### Filtering Based on Expressions

In [5]:
tools.filter(
    pl.col("cordless") &  # <1>
    (pl.col("brand") == "Makita")
)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


### Filtering Based on Column Names

In [7]:
tools.filter("cordless")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0


### Filtering Based on Constraints

In [9]:
tools.filter(cordless=True, brand="Makita")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000


## Sorting Rows

### Sorting Based On a Single Column

In [12]:
tools.sort("price")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


### Sorting in Reverse

In [14]:
tools.sort("price", descending=True)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0


In [15]:
tools.sort("price", ascending=False)

TypeError: DataFrame.sort() got an unexpected keyword argument 'ascending'

### Sorting Based on Multiple Columns

In [17]:
tools.sort("brand", "price")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0


In [18]:
tools.sort("brand", "price", descending=[False, True])

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0


### Sorting Based on Expressions

In [20]:
tools.sort(pl.col("rpm") / pl.col("price"))

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050.0
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300.0
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500.0
"""Impact Driver""","""DTD157Z""","""Makita""",True,156,3000.0
"""Angle Grinder""","""DGA504ZJ""","""Makita""",True,229,8500.0
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100.0
"""Random Orbital Sander""","""DBO180ZJ""","""Makita""",True,199,11000.0
"""Router""","""POF 1400 ACE""","""Bosch""",False,185,28000.0


### Sorting Nested Data Types

In [22]:
tools_collection = tools.group_by("brand").agg(collection=pl.struct(pl.all()))
tools_collection

brand,collection
str,list[struct[6]]
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]"
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]"
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]"


In [23]:
tools_collection.sort(pl.col("collection").list.len(), descending=True)

brand,collection
str,list[struct[6]]
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]"
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]"


In [24]:
tools_collection.sort(
    pl.col("collection").list.eval(
        pl.element().struct.field("price")
    ).list.mean()
)

brand,collection
str,list[struct[6]]
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]"
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]"
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]"


In [25]:
tools_collection.with_columns(
    mean_price=pl.col("collection").list.eval(
        pl.element().struct.field("price")
    ).list.mean()
).sort("mean_price")

brand,collection,mean_price
str,list[struct[6]],f64
"""Bosch""","[{""Miter Saw"",""GCM 8 SJL"",""Bosch"",false,391,5500}, {""Jigsaw"",""PST 900 PEL"",""Bosch"",false,79,3100}, {""Router"",""POF 1400 ACE"",""Bosch"",false,185,28000}]",218.333333
"""Makita""","[{""Rotary Hammer"",""HR2230"",""Makita"",false,199,1050}, {""Plunge Cut Saw"",""DSP600ZJ"",""Makita"",true,459,6300}, … {""Random Orbital Sander"",""DBO180ZJ"",""Makita"",true,199,11000}]",248.4
"""DeWalt""","[{""Nail Gun"",""DPSB2IN1-XJ"",""DeWalt"",true,129,null}, {""Table Saw"",""DWE7485"",""DeWalt"",false,516,5800}]",322.5


## Related Row Operations

In [27]:
tools.drop_nulls("rpm").height

9

In [28]:
(
    tools.with_row_index()
    .gather_every(2).head(3)
)

index,tool,product,brand,cordless,price,rpm
u32,str,str,str,bool,i64,i64
0,"""Rotary Hammer""","""HR2230""","""Makita""",False,199,1050
2,"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
4,"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100


In [29]:
tools.top_k(3, by="price")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500


In [30]:
tools.sample(fraction=0.2)

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Nail Gun""","""DPSB2IN1-XJ""","""DeWalt""",True,129,
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800.0


In [31]:
saws = pl.DataFrame({"tool": ["Table Saw", "Plunge Cut Saw", "Miter Saw",
                              "Jigsaw", "Bandsaw", "Chainsaw", "Seesaw"]})
tools.join(saws, how="semi", on="tool")

tool,product,brand,cordless,price,rpm
str,str,str,bool,i64,i64
"""Miter Saw""","""GCM 8 SJL""","""Bosch""",False,391,5500
"""Plunge Cut Saw""","""DSP600ZJ""","""Makita""",True,459,6300
"""Jigsaw""","""PST 900 PEL""","""Bosch""",False,79,3100
"""Table Saw""","""DWE7485""","""DeWalt""",False,516,5800


## Takeaways