In [None]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier


In [None]:

df = sns.load_dataset('titanic')


In [None]:

df.isnull().sum()


In [None]:

sns.countplot(x='survived', data=df)
plt.show()

sns.histplot(df['age'].dropna(), kde=True)
plt.show()

sns.boxplot(x='survived', y='fare', data=df)
plt.show()


In [None]:

num_cols = df.select_dtypes(include=['int64','float64']).columns
cat_cols = df.select_dtypes(include='object').columns

num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

df[num_cols] = num_imputer.fit_transform(df[num_cols])
df[cat_cols] = cat_imputer.fit_transform(df[cat_cols])


In [None]:

df = df.drop(columns=['deck','embark_town','alive','who','adult_male','class'])


In [None]:

le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])


In [None]:

X = df.drop('survived', axis=1)
y = df['survived']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:

xgb_params = {
    'n_estimators': [100, 200],
    'max_depth': [3, 5],
    'learning_rate': [0.05, 0.1],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)

xgb_grid = GridSearchCV(xgb, xgb_params, scoring='f1', cv=5, n_jobs=-1)
xgb_grid.fit(X_train, y_train)

xgb_pred = xgb_grid.best_estimator_.predict(X_test)

xgb_acc = accuracy_score(y_test, xgb_pred)
xgb_prec = precision_score(y_test, xgb_pred)
xgb_rec = recall_score(y_test, xgb_pred)
xgb_f1 = f1_score(y_test, xgb_pred)


In [None]:

lgbm_params = {
    'n_estimators': [100, 200],
    'learning_rate': [0.05, 0.1],
    'num_leaves': [31, 50],
    'subsample': [0.8, 1.0]
}

lgbm = LGBMClassifier(random_state=42)

lgbm_grid = GridSearchCV(lgbm, lgbm_params, scoring='f1', cv=5, n_jobs=-1)
lgbm_grid.fit(X_train, y_train)

lgbm_pred = lgbm_grid.best_estimator_.predict(X_test)

lgbm_acc = accuracy_score(y_test, lgbm_pred)
lgbm_prec = precision_score(y_test, lgbm_pred)
lgbm_rec = recall_score(y_test, lgbm_pred)
lgbm_f1 = f1_score(y_test, lgbm_pred)


In [None]:

pd.DataFrame({
    'Model': ['XGBoost','LightGBM'],
    'Accuracy': [xgb_acc, lgbm_acc],
    'Precision': [xgb_prec, lgbm_prec],
    'Recall': [xgb_rec, lgbm_rec],
    'F1 Score': [xgb_f1, lgbm_f1]
})
