## 04. Filtering rows

### Filtering row : Indexing with `[]`

In [1]:
import polars as pl
csv_file = './data/titanic.csv'
df = pl.read_csv(csv_file)
df.head()

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S"""


#### Selecting individual row with `[]`

polars는 구시대적인 pandas와 다르게 눈에 보이는 index가 없다. 하지만 숨겨진 절대적인 index는 존재한다... 따라서 이 index를 통해 행을 불러 올 수 있다.

In [4]:
df.schema

Schema([('PassengerId', Int64),
        ('Survived', Int64),
        ('Pclass', Int64),
        ('Name', String),
        ('Sex', String),
        ('Age', Float64),
        ('SibSp', Int64),
        ('Parch', Int64),
        ('Ticket', String),
        ('Fare', Float64),
        ('Cabin', String),
        ('Embarked', String)])

In [7]:
df[0]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""


In [8]:
df[[1, 5]]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
6,0,3,"""Moran, Mr. James""","""male""",,0,0,"""330877""",8.4583,,"""Q"""


In [9]:
# slice
df[:3] # 0, 1, 2 번째 row

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [14]:
print(list(range(2, 9)))

[2, 3, 4, 5, 6, 7, 8]


In [18]:
df[range(2, 9)]

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S"""
6,0,3,"""Moran, Mr. James""","""male""",,0,0,"""330877""",8.4583,,"""Q"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
8,0,3,"""Palsson, Master. Gosta Leonard""","""male""",2.0,3,1,"""349909""",21.075,,"""S"""
9,1,3,"""Johnson, Mrs. Oscar W (Elisabe…","""female""",27.0,0,2,"""347742""",11.1333,,"""S"""


In [21]:
# numpy
import numpy as np
print(np.arange(0, 3))

df[np.arange(0, 3)]

[0 1 2]


PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


#### Boolean `Series`

In [25]:
print(df['Age'] > 30)

#df[df['Age'] > 30]

'''
이제는 polars에서 boolean series indexing 안된다!!!
polars에 있는 강력한 filter 함수만을 이용하면 된다!!
'''

shape: (891,)
Series: 'Age' [bool]
[
	false
	true
	false
	true
	true
	…
	false
	false
	null
	false
	true
]


In [27]:
(
    df
    .filter(
        pl.col('Age') > 30
    )
    .head()
)


PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""


### Filter row Using `filter` and the Expression API

#### Applying conditions with `filter`

쉽게 생각해서 `filter()`은 특정 조건을 만족하는 행을 걸려주는 함수~~!

In [28]:
# pclass가 1인 사람들의 정보
(
    df
    .filter(
        pl.col('Pclass') == 1
    )
    .head()
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""
24,1,1,"""Sloper, Mr. William Thompson""","""male""",28.0,0,0,"""113788""",35.5,"""A6""","""S"""


#### Syntax of `filter`
Inside the `filter` method we pass an _**expression**_ and apply a Boolean condition to it:

`pl.col('Pclass') == 1` -> 특정 열에서의 행을 걸러주는 조건 -> True, False를 반환.

This expression has two parts:
- `pl.col('Pclass')` expression selects the `Pclass` column from `df`
    −  특정 열을 선택하고
- `== 1` applies a Boolean condition to this expression - 그 열의 행에 적용하고 싶은 조건을 건다.

In this example we choose all rows with the number of parents & children (`Parch`) is greater than 1

In [30]:
(
    df
    .filter(
        pl.col('Parch') > 1
    )
    .select(
        'PassengerId', 'Parch', 'SibSp' # 굳이 pl.col를 안해도 단순 출력을 위할땐 이렇게 하는게 더 편하겠지?
    )
    .head()
)

PassengerId,Parch,SibSp
i64,i64,i64
9,2,0
14,5,1
26,5,1
28,2,3
44,2,1


In [31]:
# find rows where the number of parents & children (`Parch`) is greater than the number of siblings (`SibSp`)
(
    df
    .filter(
        pl.col('Parch') > pl.col('SibSp')
    )
    .select(
        'PassengerId', 'Parch', 'SibSp'
    )
    .head()
)

PassengerId,Parch,SibSp
i64,i64,i64
9,2,0
14,5,1
26,5,1
44,2,1
55,1,0


#### Conditions based on row numbers with `filter`

polars는 원래 index가 없음,, 하지만 보이는 index 열을 `.with_row_index(name = "~")`으로 추가 가능

In [32]:
df = pl.read_csv(csv_file)
df = df.with_row_index(name = "index")
df.head(3)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
0,1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
2,3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""


In [33]:
(
    df
    .filter(
        pl.col('index') < 4
    )
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
0,1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
2,3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S"""
3,4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""


근데 굳이 이렇게 할 이유가 있을까..? ㅋㅋ

#### Filtering on a Boolean column

In [34]:
(
    df
    .with_columns(
        First_class = pl.col("Pclass") == 1 # First_class라는 새로운 열을 만들고 Pclass가 1이면 true 아니면 false를 value에 넣음
    )
    .filter(
        pl.col('First_class')
    )
    .head()
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,First_class
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,bool
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",True
3,4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S""",True
6,7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S""",True
11,12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S""",True
23,24,1,1,"""Sloper, Mr. William Thompson""","""male""",28.0,0,0,"""113788""",35.5,"""A6""","""S""",True


In [36]:
# 근데 걍 아래의 코드와 동일한 작업을 함
(
    df
    .filter(
        pl.col('Pclass') == 1
    )
    .head()
)

'''
그 대신의 위의 코드는 불리안 컬럼으로도 인덱싱이 가능하다! 정도를 알게 해주는..
애초에 pl.col('Pclass') == 1 자체가 불리안 컬럼이 되잖슴
'''

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
3,4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
6,7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
11,12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""
23,24,1,1,"""Sloper, Mr. William Thompson""","""male""",28.0,0,0,"""113788""",35.5,"""A6""","""S"""


In [37]:
df = (
    df
    .with_columns(
        less_than_30 = pl.col('Age') < 30
    )
)

print(df.head())

shape: (5, 14)
┌───────┬─────────────┬──────────┬────────┬───┬─────────┬───────┬──────────┬──────────────┐
│ index ┆ PassengerId ┆ Survived ┆ Pclass ┆ … ┆ Fare    ┆ Cabin ┆ Embarked ┆ less_than_30 │
│ ---   ┆ ---         ┆ ---      ┆ ---    ┆   ┆ ---     ┆ ---   ┆ ---      ┆ ---          │
│ u32   ┆ i64         ┆ i64      ┆ i64    ┆   ┆ f64     ┆ str   ┆ str      ┆ bool         │
╞═══════╪═════════════╪══════════╪════════╪═══╪═════════╪═══════╪══════════╪══════════════╡
│ 0     ┆ 1           ┆ 0        ┆ 3      ┆ … ┆ 7.25    ┆ null  ┆ S        ┆ true         │
│ 1     ┆ 2           ┆ 1        ┆ 1      ┆ … ┆ 71.2833 ┆ C85   ┆ C        ┆ false        │
│ 2     ┆ 3           ┆ 1        ┆ 3      ┆ … ┆ 7.925   ┆ null  ┆ S        ┆ true         │
│ 3     ┆ 4           ┆ 1        ┆ 1      ┆ … ┆ 53.1    ┆ C123  ┆ S        ┆ false        │
│ 4     ┆ 5           ┆ 0        ┆ 3      ┆ … ┆ 8.05    ┆ null  ┆ S        ┆ false        │
└───────┴─────────────┴──────────┴────────┴───┴─────────┴───────┴

In [39]:
(
    df
    .filter(
        pl.col('less_than_30')
    )
    .head()
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,less_than_30
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,bool
0,1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",True
2,3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S""",True
7,8,0,3,"""Palsson, Master. Gosta Leonard""","""male""",2.0,3,1,"""349909""",21.075,,"""S""",True
8,9,1,3,"""Johnson, Mrs. Oscar W (Elisabe…","""female""",27.0,0,2,"""347742""",11.1333,,"""S""",True
9,10,1,2,"""Nasser, Mrs. Nicholas (Adele A…","""female""",14.0,1,0,"""237736""",30.0708,,"""C""",True


In [40]:
# 30살 이상
(
    df
    .filter(
        ~pl.col('less_than_30')
    )
    .head()
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,less_than_30
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,bool
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",False
3,4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S""",False
4,5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S""",False
6,7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S""",False
11,12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S""",False


#### Partitioning a `DataFrame`

In [41]:
df_sex_dict = (
    df
    .partition_by(
        by = ['Sex'],
        as_dict = True
    )
)

df_sex_dict

{('male',): shape: (577, 14)
 ┌───────┬─────────────┬──────────┬────────┬───┬─────────┬───────┬──────────┬──────────────┐
 │ index ┆ PassengerId ┆ Survived ┆ Pclass ┆ … ┆ Fare    ┆ Cabin ┆ Embarked ┆ less_than_30 │
 │ ---   ┆ ---         ┆ ---      ┆ ---    ┆   ┆ ---     ┆ ---   ┆ ---      ┆ ---          │
 │ u32   ┆ i64         ┆ i64      ┆ i64    ┆   ┆ f64     ┆ str   ┆ str      ┆ bool         │
 ╞═══════╪═════════════╪══════════╪════════╪═══╪═════════╪═══════╪══════════╪══════════════╡
 │ 0     ┆ 1           ┆ 0        ┆ 3      ┆ … ┆ 7.25    ┆ null  ┆ S        ┆ true         │
 │ 4     ┆ 5           ┆ 0        ┆ 3      ┆ … ┆ 8.05    ┆ null  ┆ S        ┆ false        │
 │ 5     ┆ 6           ┆ 0        ┆ 3      ┆ … ┆ 8.4583  ┆ null  ┆ Q        ┆ null         │
 │ 6     ┆ 7           ┆ 0        ┆ 1      ┆ … ┆ 51.8625 ┆ E46   ┆ S        ┆ false        │
 │ 7     ┆ 8           ┆ 0        ┆ 3      ┆ … ┆ 21.075  ┆ null  ┆ S        ┆ true         │
 │ …     ┆ …           ┆ …        ┆ …    

In [44]:
(
    df_sex_dict['male', ]
    .filter(
        pl.col('Age') > 30
    )
    .head()
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,less_than_30
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,bool
4,5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S""",False
6,7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S""",False
13,14,0,3,"""Andersson, Mr. Anders Johan""","""male""",39.0,1,5,"""347082""",31.275,,"""S""",False
20,21,0,2,"""Fynney, Mr. Joseph J""","""male""",35.0,0,0,"""239865""",26.0,,"""S""",False
21,22,1,2,"""Beesley, Mr. Lawrence""","""male""",34.0,0,0,"""248698""",13.0,"""D56""","""S""",False


In [47]:
'''
분할도 가능하지만 아래와 같이 & 또는 | 으로 한 번에 가능하다.
위의 예제는 케이스 별로 데이터를 나누어 작업 할 때 좋을듯?
'''
(
    df
    .filter(
        (pl.col('Sex') == 'male') & (pl.col('Age') > 30)
    )
    .head()
)

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,less_than_30
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,bool
4,5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S""",False
6,7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S""",False
13,14,0,3,"""Andersson, Mr. Anders Johan""","""male""",39.0,1,5,"""347082""",31.275,,"""S""",False
20,21,0,2,"""Fynney, Mr. Joseph J""","""male""",35.0,0,0,"""239865""",26.0,,"""S""",False
21,22,1,2,"""Beesley, Mr. Lawrence""","""male""",34.0,0,0,"""248698""",13.0,"""D56""","""S""",False


### Filtering row : Multiple filter conditions

In [48]:
df = pl.read_csv(csv_file)

In [49]:
(
    df
    .filter(
        pl.col("Pclass") == 1
    )
    .filter(
        pl.col('Age') > 70
    )
    .head(3)
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
97,0,1,"""Goldschmidt, Mr. George B""","""male""",71.0,0,0,"""PC 17754""",34.6542,"""A5""","""C"""
494,0,1,"""Artagaveytia, Mr. Ramon""","""male""",71.0,0,0,"""PC 17609""",49.5042,,"""C"""
631,1,1,"""Barkworth, Mr. Algernon Henry …","""male""",80.0,0,0,"""27042""",30.0,"""A23""","""S"""


In [50]:
(
    df
    .filter(
        (pl.col('Pclass') == 1) & (pl.col('Age') > 70)
    )
    .head()
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
97,0,1,"""Goldschmidt, Mr. George B""","""male""",71.0,0,0,"""PC 17754""",34.6542,"""A5""","""C"""
494,0,1,"""Artagaveytia, Mr. Ramon""","""male""",71.0,0,0,"""PC 17609""",49.5042,,"""C"""
631,1,1,"""Barkworth, Mr. Algernon Henry …","""male""",80.0,0,0,"""27042""",30.0,"""A23""","""S"""


In [51]:
# better way
(
    df
    .filter(
        pl.col("Pclass") == 1,
        pl.col("Age") > 70
    )
    .head()
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
97,0,1,"""Goldschmidt, Mr. George B""","""male""",71.0,0,0,"""PC 17754""",34.6542,"""A5""","""C"""
494,0,1,"""Artagaveytia, Mr. Ramon""","""male""",71.0,0,0,"""PC 17609""",49.5042,,"""C"""
631,1,1,"""Barkworth, Mr. Algernon Henry …","""male""",80.0,0,0,"""27042""",30.0,"""A23""","""S"""


In [52]:
# 여러 컬럼을 동시에..
(
    df
    .filter(
        pl.all_horizontal(
            pl.all().is_not_null()
        )
    )
    .head()
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
11,1,3,"""Sandstrom, Miss. Marguerite Ru…","""female""",4.0,1,1,"""PP 9549""",16.7,"""G6""","""S"""
12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""


In [56]:
(
    df
    .filter(
        pl.col('Age').is_between(10, 13),
        pl.col('Sex') == 'female'
    )
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
420,0,3,"""Van Impe, Miss. Catharina""","""female""",10.0,0,2,"""345773""",24.15,,"""S"""
447,1,2,"""Mellinger, Miss. Madeleine Vio…","""female""",13.0,0,1,"""250644""",19.5,,"""S"""
543,0,3,"""Andersson, Miss. Sigrid Elisab…","""female""",11.0,4,2,"""347082""",31.275,,"""S"""
781,1,3,"""Ayoub, Miss. Banoura""","""female""",13.0,0,0,"""2687""",7.2292,,"""C"""


In [57]:
# or conditions
(
    df
    .filter(
        (pl.col('Age') > 50) | (pl.col('Pclass') == 1)
    )
)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C"""
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S"""
7,0,1,"""McCarthy, Mr. Timothy J""","""male""",54.0,0,0,"""17463""",51.8625,"""E46""","""S"""
12,1,1,"""Bonnell, Miss. Elizabeth""","""female""",58.0,0,0,"""113783""",26.55,"""C103""","""S"""
16,1,2,"""Hewlett, Mrs. (Mary D Kingcome…","""female""",55.0,0,0,"""248706""",16.0,,"""S"""
…,…,…,…,…,…,…,…,…,…,…,…
872,1,1,"""Beckwith, Mrs. Richard Leonard…","""female""",47.0,1,1,"""11751""",52.5542,"""D35""","""S"""
873,0,1,"""Carlsson, Mr. Frans Olof""","""male""",33.0,0,0,"""695""",5.0,"""B51 B53 B55""","""S"""
880,1,1,"""Potter, Mrs. Thomas Jr (Lily A…","""female""",56.0,0,1,"""11767""",83.1583,"""C50""","""C"""
888,1,1,"""Graham, Miss. Margaret Edith""","""female""",19.0,0,0,"""112053""",30.0,"""B42""","""S"""
