In [1]:
import pandas as pd
import numpy as np
# getting the train data
df = pd.read_csv('train.csv')

# Fill missing ages with median
df['Age'].fillna(df['Age'].median(), inplace=True)

# Fill missing embarkation ports with mode
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

# add new feature familysize
df["FamilySize"] = df["SibSp"] + df["Parch"] + 1

# categorize familysize
df["FamilyGroup"] = pd.cut(
    df["FamilySize"],
    bins=[0, 1, 4, np.inf],
    labels=["Alone", "Small", "Large"]
)

# add a new binary feature to determine whether the passenger is alone or not
df["IsAlone"] = (df["FamilySize"] == 1).astype(int)

# combine pclass and familysize to model how family size impacts survival differently across classes:
df["Pclass_FamilySize"] = df["Pclass"] * df["FamilySize"]

# Extract titles using regex
df["Title"] = df["Name"].str.extract(r' ([A-Za-z]+)\.', expand=False)

# Group rare titles or consolidate categories
df["Title"] = df["Title"].replace(["Rev", "Dr", "Col", "Major"], "Rare")
df["Title"] = df["Title"].replace(["Mlle", "Ms"], "Miss")
df["Title"] = df["Title"].replace("Mme", "Mrs")

title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
df["Title"] = df["Title"].map(title_mapping)

# Converted Sex to binary (0 for male, 1 for female).
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})

# One-hot encoded Embarked (port of embarkation: S, C, Q).
df = pd.get_dummies(df, columns=["Embarked"], prefix="Embarked")

df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,FamilySize,FamilyGroup,IsAlone,Pclass_FamilySize,Title,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.2500,,2,Small,0,6,1.0,False,False,True
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,2,Small,0,2,3.0,True,False,False
2,3,1,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.9250,,1,Alone,1,3,2.0,False,False,True
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1000,C123,2,Small,0,2,3.0,False,False,True
4,5,0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.0500,,1,Alone,1,3,1.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",0,27.0,0,0,211536,13.0000,,1,Alone,1,2,5.0,False,False,True
887,888,1,1,"Graham, Miss. Margaret Edith",1,19.0,0,0,112053,30.0000,B42,1,Alone,1,1,2.0,False,False,True
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",1,28.0,1,2,W./C. 6607,23.4500,,4,Small,0,12,2.0,False,False,True
889,890,1,1,"Behr, Mr. Karl Howell",0,26.0,0,0,111369,30.0000,C148,1,Alone,1,1,1.0,True,False,False
