In [9]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score

# Load the dataset
train_data = pd.read_csv("dataset/train.csv")
test_data = pd.read_csv("dataset/test.csv")

# Data Preprocessing
features = ["Pclass", "Sex", "Age", "SibSp", "Parch"]
target = "Survived"

def preprocess_data(df):
    df["Age"].fillna(df["Age"].mean(), inplace=True)
    df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
    df["FamilySize"] = df["SibSp"] + df["Parch"]
    return df[features]

X_train = preprocess_data(train_data)
y_train = train_data[target]
X_test = preprocess_data(test_data)

# Apply SMOTE to balance the data
smote = SMOTE(random_state=1)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Split the resampled data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_resampled, y_train_resampled, test_size=0.2, random_state=1)

# Define a Keras neural network model
def create_keras_model():
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], activation="relu"))
    model.add(Dense(64, activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

keras_classifier = KerasClassifier(build_fn=create_keras_model, epochs=60, batch_size=64, verbose=1)

# Ensemble Learning (Soft Voting) including Keras NN
models = [
    ("Random Forest", RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)),
    ("Gradient Boosting", GradientBoostingClassifier(n_estimators=100, random_state=1)),
    ("Decision Tree", DecisionTreeClassifier(criterion="entropy", max_depth=4, random_state=1)),
    ("SVM (RBF Kernel)", SVC(kernel="rbf", C=1.0, gamma=0.2, probability=True, random_state=1)),
    ("Logistic Regression", LogisticRegression(random_state=1)),
    ("Keras Neural Network", keras_classifier),
]

ensemble = VotingClassifier(models, voting="soft")

# Train the ensemble model
ensemble.fit(X_train, y_train)

# Evaluate the ensemble model on the validation set
ensemble_accuracy = accuracy_score(y_val, ensemble.predict(X_val))
print(f"Ensemble Model Accuracy: {ensemble_accuracy}")

# Get probability estimates for test data
test_probabilities = ensemble.predict_proba(X_test)[:, 1]

# Apply a threshold to convert probabilities to binary predictions
print(test_probabilities)
threshold = 0.7
test_predictions = (test_probabilities > threshold).astype(int)

# Save the test predictions to a CSV file
output = pd.DataFrame({"PassengerId": test_data["PassengerId"], "Survived": test_predictions})
output.to_csv("submission_ensemble_with_keras.csv", index=False)
print("Test predictions saved to submission_ensemble_with_keras.csv")


Epoch 1/60


  X, y = self._initialize(X, y)


Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Ensemble Model Accuracy: 0.8181818181818182
[0.12632083 0.28652292 0.27601193 0.24565091 0.66573157 0.31562383
 0.72088737 0.21230969 0.70605165 0.15030069 0.21527284 0.38789667
 0.95610957 0.29149618 0.8907462  0.89082341 0.22200157 0.17933256
 0.66990883 0.32424963 0.43378983 0.48564116 0.94776241