In [4]:
import polars as pl

# Creating a Dataframe

In [5]:
df = pl.read_csv('notebooks/data/titanic.csv')
df.head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Ow…","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. …","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Mis…","""female""",26.0,0,0,"""STON/O2. 31012…",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs.…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. Wil…","""male""",35.0,0,0,"""373450""",8.05,,"""S"""


# Expressions

In [6]:
df[:3,["Pclass","Name","Age"]]

Pclass,Name,Age
i64,str,f64
3,"""Braund, Mr. Ow…",22.0
1,"""Cumings, Mrs. …",38.0
3,"""Heikkinen, Mis…",26.0


In [10]:
data = df.select([pl.col("Pclass"),
                  pl.col("Name"),
                  pl.col("Age")])

data.head(4)

Pclass,Name,Age
i64,str,f64
3,"""Braund, Mr. Ow…",22.0
1,"""Cumings, Mrs. …",38.0
3,"""Heikkinen, Mis…",26.0
1,"""Futrelle, Mrs.…",35.0


In [11]:
data = df.select([pl.col("Pclass"),
                  pl.col("Name").str.to_lowercase(),
                  pl.col("Age").round(2)])

data.head(4)

Pclass,Name,Age
i64,str,f64
3,"""braund, mr. ow…",22.0
1,"""cumings, mrs. …",38.0
3,"""heikkinen, mis…",26.0
1,"""futrelle, mrs.…",35.0


# DataFrames and LazyFrames

**Eager mode**: each line of code is run as soon as it is encountered.

**Lazy mode**: each line is added to a query plan and the query plan is optimized.

In [17]:
csvFile = 'notebooks/data/titanic.csv'

# DataFrames
dfEager = pl.read_csv(csvFile)

# LazyFrames
dfLazy = pl.scan_csv(csvFile)

# Check the type
type(dfEager), type(dfLazy)

(polars.dataframe.frame.DataFrame, polars.lazyframe.frame.LazyFrame)

In [24]:
dfEager.head(4)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Ow…","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. …","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Mis…","""female""",26.0,0,0,"""STON/O2. 31012…",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs.…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""


In [18]:
dfLazy.head(4)

In [23]:
(
    dfLazy.rename({"PassengerId":"Id"}) 
      .head(4)   
)

In [26]:
print(
    pl.scan_csv(csvFile)
    .rename({"PassengerId":"Id"})
    .explain()
)  

RENAME

    CSV SCAN notebooks/data/titanic.csv
    PROJECT */12 COLUMNS


In [32]:
print(
    pl.scan_csv(csvFile)
    .filter(pl.col('Fare') < 10)
    .filter(pl.col('Sex') == 'female')
    .head(2)
    .explain()
)

SLICE[offset: 0, len: 2]

    CSV SCAN notebooks/data/titanic.csv
    PROJECT */12 COLUMNS
    SELECTION: [([(col("Sex")) == (Utf8(female))]) & ([(col("Fare")) < (10.0)])]


In [42]:
print(
    pl.scan_csv(csvFile)
    .filter(pl.col('Fare') < 10)
    .filter(pl.col('Sex') == 'female')
    .head(2)
    .explain(optimized=False)
)

SLICE[offset: 0, len: 2]
  FILTER [(col("Sex")) == (Utf8(female))] FROM
  FILTER [(col("Fare")) < (10)] FROM

    CSV SCAN notebooks/data/titanic.csv
    PROJECT */12 COLUMNS


In [43]:
print(
    pl.scan_csv(csvFile)
    .filter(pl.col('Fare') < 10)
    .filter(pl.col('Sex') == 'female')
    .head(2)
    .explain(optimized=True)
)

SLICE[offset: 0, len: 2]

    CSV SCAN notebooks/data/titanic.csv
    PROJECT */12 COLUMNS
    SELECTION: [([(col("Sex")) == (Utf8(female))]) & ([(col("Fare")) < (10.0)])]
