In [4]:
# Re-processing the dataset with the provided specifications
import pandas as pd

# Load the initial dataset
data_path = '/Users/jamie/Desktop/Hult/bs2/data/train.csv'
df = pd.read_csv(data_path)

# Splitting the PassengerId into distinct group and number within group
df[['Group', 'GroupNo']] = df['PassengerId'].str.split('_', expand=True)
df['Group'] = df['Group'].astype(int)
df['GroupNo'] = df['GroupNo'].astype(int)

# Extracting cabin details
df[['Deck', 'Num', 'Side']] = df['Cabin'].str.split('/', expand=True)

# Mapping 'Transported' to binary
df['Transported'] = df['Transported'].map({True: 1, False: 0})

# Handling missing values
# For categorical columns, we replace missing values with the mode (most frequent value)
# For numerical columns, we replace missing values with the median

categorical_cols = ['HomePlanet', 'CryoSleep', 'Cabin', 'Destination', 'VIP', 'Deck', 'Side']
numerical_cols = ['Age', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

for col in categorical_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)

for col in numerical_cols:
    df[col].fillna(df[col].median(), inplace=True)

# Dropping original 'Cabin' as it's now split into 'Deck', 'Num', 'Side'
df.drop(columns=['Cabin'], inplace=True)




In [6]:
df

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,GroupNo,Deck,Num,Side
0,0001_01,Europa,False,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,0,1,1,B,0,P
1,0002_01,Earth,False,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,1,2,1,F,0,S
2,0003_01,Europa,False,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,0,3,1,A,0,S
3,0003_02,Europa,False,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,0,3,2,A,0,S
4,0004_01,Earth,False,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,1,4,1,F,1,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8688,9276_01,Europa,False,55 Cancri e,41.0,True,0.0,6819.0,0.0,1643.0,74.0,Gravior Noxnuther,0,9276,1,A,98,P
8689,9278_01,Earth,True,PSO J318.5-22,18.0,False,0.0,0.0,0.0,0.0,0.0,Kurta Mondalley,0,9278,1,G,1499,S
8690,9279_01,Earth,False,TRAPPIST-1e,26.0,False,0.0,0.0,1872.0,1.0,0.0,Fayey Connon,1,9279,1,G,1500,S
8691,9280_01,Europa,False,55 Cancri e,32.0,False,0.0,1049.0,0.0,353.0,3235.0,Celeon Hontichre,0,9280,1,E,608,S
