In [None]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("Titanic-Dataset.csv")
print("Original Data:\n", df.head())

In [None]:
# a. Handle missing Age and Cabin values
df['Age'].fillna(df['Age'].median(), inplace=True)  # Median imputation for Age
df['Cabin'].fillna('Unknown', inplace=True)         # Fill Cabin with 'Unknown'

In [None]:
print("\nAfter Handling Missing Age and Cabin:\n", df[['Age', 'Cabin']].head())

# b. Convert Sex and Embarked to numeric form
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)  # Fill missing with mode
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

print("\nAfter Encoding Sex and Embarked:\n", df[['Sex'] + [col for col in df.columns if 'Embarked' in col]].head())


In [None]:
# c. Create new feature FamilySize
df['FamilySize'] = df['SibSp'] + df['Parch']
print("\nAfter Creating FamilySize:\n", df[['SibSp', 'Parch', 'FamilySize']].head())


In [None]:

# d. Bin Fare into categories
fare_bins = [0, 30, 70, np.inf]
fare_labels = ['Low', 'Medium', 'High']
df['FareCategory'] = pd.cut(df['Fare'], bins=fare_bins, labels=fare_labels)

print("\nAfter Binning Fare:\n", df[['Fare', 'FareCategory']].head())