# Lazy Mode

## Import module

In [1]:
import polars as pl

## Data Path

In [2]:
csv_file = "data/titanic.csv"

## `Eager Mode` & `Lazy Mode`

Find out the average age of passengers in each class.

In [None]:
# Eager mode - load all data into memory
pl.read_csv(csv_file).group_by("Pclass").agg(pl.col("Age").mean())

Pclass,Age
i64,f64
2,29.87763
3,25.14062
1,38.233441


In [None]:
# Lazy mode - load result into memory
pl.scan_csv(csv_file).group_by("Pclass").agg(pl.col("Age").mean()).collect()

Pclass,Age
i64,f64
2,29.87763
1,38.233441
3,25.14062


## `DataFrames` & `LazyFrames`

In [9]:
df_eager = pl.read_csv(csv_file)
df_eager.head(2)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""


In [None]:
# without .collection, the LazyFrame is only a query.
df_lazy = pl.scan_csv(csv_file)
df_lazy

In [15]:
df_lazy.head(2).collect()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""


## Schema & Column Names

For eager mode, use `schema` and `columns`' to retrieve schema and column names.

As for lazy mode, use `.collect_schema()` and `.names()` to do the same thing.

In [None]:
# schema
df_eager.schema

Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [22]:
df_eager.columns

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']

In [None]:
# column names
df_lazy.collect_schema().names()

['PassengerId',
 'Survived',
 'Pclass',
 'Name',
 'Sex',
 'Age',
 'SibSp',
 'Parch',
 'Ticket',
 'Fare',
 'Cabin',
 'Embarked']