In [1]:
import polars as pl
import pathlib
path_to_data = pathlib.Path("data/titanic.csv")
path_to_data.exists()

True

In [2]:
# set the config to print 4 rows by default when printing out the DataFrame
pl.Config.set_tbl_rows(4)

polars.cfg.Config

In [3]:
df = pl.read_csv(path_to_data)
df.head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Ow...","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. ...","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
...,...,...,...,...,...,...,...,...,...,...,...
4,1,1,"""Futrelle, Mrs....","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. Wil...","""male""",35.0,0,0,"""373450""",8.05,,"""S"""


In [5]:
# sort a DataFrame with `.sort`
df.sort("Age")

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
6,0,3,"""Moran, Mr. Jam...","""male""",,0,0,"""330877""",8.4583,,"""Q"""
18,1,2,"""Williams, Mr. ...","""male""",,0,0,"""244373""",13.0,,"""S"""
...,...,...,...,...,...,...,...,...,...,...,...
852,0,3,"""Svensson, Mr. ...","""male""",74.0,0,0,"""347060""",7.775,,"""S"""
631,1,1,"""Barkworth, Mr....","""male""",80.0,0,0,"""27042""",30.0,"""A23""","""S"""


In [7]:
# sort on multiple columns
df.sort(["Pclass", "Age"])

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
32,1,1,"""Spencer, Mrs. ...","""female""",,1,0,"""PC 17569""",146.5208,"""B78""","""C"""
56,1,1,"""Woolner, Mr. H...","""male""",,0,0,"""19947""",35.5,"""C52""","""S"""
...,...,...,...,...,...,...,...,...,...,...,...
117,0,3,"""Connors, Mr. P...","""male""",70.5,0,0,"""370369""",7.75,,"""Q"""
852,0,3,"""Svensson, Mr. ...","""male""",74.0,0,0,"""347060""",7.775,,"""S"""


# Sorting an Entire Column using an Expression

In [8]:
(
    df
    .select(
        pl.all().sort()
    )
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,1,"""Abbing, Mr. An...","""female""",,0,0,"""110152""",0.0,,
2,0,1,"""Abbott, Mr. Ro...","""female""",,0,0,"""110152""",0.0,,
...,...,...,...,...,...,...,...,...,...,...,...
890,1,3,"""van Billiard, ...","""male""",74.0,8,5,"""WE/P 5735""",512.3292,"""G6""","""S"""
891,1,3,"""van Melkebeke,...","""male""",80.0,8,6,"""WE/P 5735""",512.3292,"""T""","""S"""


If you know that your DataFrame is sorted, you can use a fast-track algorithm to take advantage of that

In [9]:
# check whether a column is sorted:
df["PassengerId"].flags

{'SORTED_ASC': False, 'SORTED_DESC': False}

In [10]:
# if you know a column is sorted, you can set it explicitly
df_sorted = df.with_column(
    pl.col("PassengerId").set_sorted()
)
df_sorted['PassengerId'].flags

{'SORTED_ASC': True, 'SORTED_DESC': False}

In [11]:
# now polars can take advantage of fast-tracked algorithms if it knows your column is sorted
(
    df_sorted
    .select(
        pl.col("PassengerId").max()
    )
)

PassengerId
i64
891
