# Titanic Survival Prediction

This Jupyter Notebook covers the steps to predict Titanic survival using different machine learning models.

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import accuracy_score

# Load the dataset
train_data = pd.read_csv("dataset/train.csv")
test_data = pd.read_csv("dataset/test.csv")

# Data Preprocessing
features = ["Pclass", "Sex", "Age", "SibSp", "Parch"]
target = "Survived"

def preprocess_data(df):
    df["Age"].fillna(df["Age"].mean(), inplace=True)
    df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
    df["FamilySize"] = df["SibSp"] + df["Parch"]
    return df[features]

X_train = preprocess_data(train_data)
y_train = train_data[target]
X_test = preprocess_data(test_data)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply SMOTE to balance the data
smote = SMOTE(random_state=1)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Split the resampled data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_resampled, y_train_resampled, test_size=0.2, random_state=1)

# Neural Network Model
model = keras.Sequential([
    layers.Dense(128, input_dim=X_train.shape[1], activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=64, verbose=1)

# Evaluate the model on the validation set
_, accuracy = model.evaluate(X_val, y_val)
print(f"Model Accuracy: {accuracy}")

# Test the model on the test data
test_predictions = (model.predict(X_test) > 0.7).astype(int).flatten()

# Save the test predictions to a CSV file
output = pd.DataFrame({"PassengerId": test_data["PassengerId"], "Survived": test_predictions})
output.to_csv("submission_keras_nn_with_smote.csv", index=False)
print("Test predictions saved to submission_keras_nn_with_smote.csv")

# You can now submit the "submission_keras_nn_with_smote.csv" file to the Kaggle competition.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model Accuracy: 0.800000011920929
Test predictions saved to submission_keras_nn_with_smote.csv
