# Titanic Survival Prediction – Advanced Model (XGBoost)
This notebook loads data, performs feature engineering, trains an XGBoost model, and generates a submission file.

In [ ]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier

train = pd.read_csv('/kaggle/input/titanic/train.csv')
test = pd.read_csv('/kaggle/input/titanic/test.csv')

In [ ]:
# Feature Engineering
train['Title'] = train['Name'].str.extract(' ([A-Za-z]+)\.')
test['Title'] = test['Name'].str.extract(' ([A-Za-z]+)\.')
rare = ['Lady','Countess','Capt','Col','Don','Dr','Major','Rev','Sir','Jonkheer','Dona']
train['Title'] = train['Title'].replace(rare,'Rare')
test['Title'] = test['Title'].replace(rare,'Rare')
train['Title'] = train['Title'].replace({'Mlle':'Miss','Ms':'Miss','Mme':'Mrs'})
test['Title'] = test['Title'].replace({'Mlle':'Miss','Ms':'Miss','Mme':'Mrs'})

for df in [train,test]:
    df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
    df['IsAlone'] = (df['FamilySize']==1).astype(int)


In [ ]:
# Encode, Impute
cols = ['Sex','Embarked','Title']
for col in cols:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col].astype(str))
    test[col] = le.transform(test[col].astype(str))

imp = SimpleImputer(strategy='median')
train[['Age','Fare']] = imp.fit_transform(train[['Age','Fare']])
test[['Age','Fare']] = imp.transform(test[['Age','Fare']])

In [ ]:
features = ['Pclass','Sex','Age','Fare','Embarked','FamilySize','IsAlone','Title']
X = train[features]
y = train['Survived']
X_test = test[features]

model = XGBClassifier(n_estimators=400, max_depth=4, learning_rate=0.05, subsample=0.9, colsample_bytree=0.9)
model.fit(X,y)

pred = model.predict(X_test)
sub = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': pred})
sub.to_csv('submission.csv', index=False)
sub.head()