# Create Columns

In [2]:
import polars as pl

df = pl.read_csv("data/titanic_dataset.csv")
df.head(1)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S"""


In [15]:
df = df.with_columns([
    # Fare에 10% surcharge를 추가한 새 컬럼 생성
    (pl.col("Fare") * 1.1).alias("Fare_with_surcharge"),

    # Pclass로 나눈 Fare를 계산 (클래스별 운임 비교용)
    (pl.col("Fare") / pl.col("Pclass")).alias("Fare_per_class"),

    # 가족 규모 계산: 동승한 형제/자매+부모+본인
    (pl.col("SibSp") + pl.col("Parch") + 1).alias("Family_size"),
    
    # Age가 존재한다면 미성년자인지 여부 (Age < 18)
    (pl.col("Age") < 18).alias("IsMinor"), 
])
df.head(1)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_with_surcharge,Fare_per_class,Family_size,IsMinor
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64,f64,i64,bool
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",7.975,2.416667,2,False


In [16]:
df.with_columns([
    pl.col("Fare").max().alias("max_fare"),
    pl.col("Fare").mean().alias("ave_fare"),
    (pl.col("Fare").max() - pl.col("Fare").mean()).alias("Fare_diff")
]).head(1)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_with_surcharge,Fare_per_class,Family_size,IsMinor,max_fare,ave_fare,Fare_diff
i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64,f64,i64,bool,f64,f64,f64
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",7.975,2.416667,2,False,512.3292,32.204208,480.124992


## Row Count

In [17]:
df.with_row_index().head()

index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_with_surcharge,Fare_per_class,Family_size,IsMinor
u32,i64,i64,i64,str,str,f64,i64,i64,str,f64,str,str,f64,f64,i64,bool
0,1,0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,"""A/5 21171""",7.25,,"""S""",7.975,2.416667,2,False
1,2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,"""PC 17599""",71.2833,"""C85""","""C""",78.41163,71.2833,2,False
2,3,1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,"""STON/O2. 3101282""",7.925,,"""S""",8.7175,2.641667,1,False
3,4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,"""113803""",53.1,"""C123""","""S""",58.41,53.1,2,False
4,5,0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,"""373450""",8.05,,"""S""",8.855,2.683333,1,False


# Modifying Columns

In [27]:
df.with_columns(
    pl.col("Name").str.to_titlecase(), 
    # 문자열 컬럼 Cabin을 대문자로 변환

    pl.col("Cabin").str.to_uppercase(),
    # 문자열 컬럼 Embarked를 소문자로 변환 
    
    pl.col("Embarked").str.to_lowercase(),
    # Fare 컬럼을 반올림
    pl.col("Fare").round(1),
    
    # Age 컬럼을 정수로 변환
    pl.col("Age").cast(pl.Int64)
).head(5)

PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_with_surcharge,Fare_per_class,Family_size,IsMinor
i64,i64,i64,str,str,i64,i64,i64,str,f64,str,str,f64,f64,i64,bool
1,0,3,"""Braund, Mr. Owen Harris""","""male""",22,1,0,"""A/5 21171""",7.3,,"""s""",7.975,2.416667,2,False
2,1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38,1,0,"""PC 17599""",71.3,"""C85""","""c""",78.41163,71.2833,2,False
3,1,3,"""Heikkinen, Miss. Laina""","""female""",26,0,0,"""STON/O2. 3101282""",7.9,,"""s""",8.7175,2.641667,1,False
4,1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35,1,0,"""113803""",53.1,"""C123""","""s""",58.41,53.1,2,False
5,0,3,"""Allen, Mr. William Henry""","""male""",35,0,0,"""373450""",8.1,,"""s""",8.855,2.683333,1,False


# Delete Columns

In [28]:
df.drop(["PassengerId", "Ticket", "Cabin"]).head(5)


Survived,Pclass,Name,Sex,Age,SibSp,Parch,Fare,Embarked,Fare_with_surcharge,Fare_per_class,Family_size,IsMinor
i64,i64,str,str,f64,i64,i64,f64,str,f64,f64,i64,bool
0,3,"""Braund, Mr. Owen Harris""","""male""",22.0,1,0,7.25,"""S""",7.975,2.416667,2,False
1,1,"""Cumings, Mrs. John Bradley (Fl…","""female""",38.0,1,0,71.2833,"""C""",78.41163,71.2833,2,False
1,3,"""Heikkinen, Miss. Laina""","""female""",26.0,0,0,7.925,"""S""",8.7175,2.641667,1,False
1,1,"""Futrelle, Mrs. Jacques Heath (…","""female""",35.0,1,0,53.1,"""S""",58.41,53.1,2,False
0,3,"""Allen, Mr. William Henry""","""male""",35.0,0,0,8.05,"""S""",8.855,2.683333,1,False
