In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense

# Step 1: Read in Titanic.csv and preprocess the data
df = pd.read_csv('Titanic.csv')





# Convert categorical features to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['Sex', 'Embarked'], drop_first=True)

# Split data into training and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Reset index for training and test sets
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

class_mapping = {'1st': 1, '2nd': 2, '3rd': 3}
df['pclass'] = df['pclass'].map(class_mapping)

# Step 2: Fit a neural network
# Prepare input features and target variable for training set
X_train = train_df[['Pclass', 'Sex_male', 'Age', 'SibSp']].values
y_train = train_df['Survived'].values

# Prepare input features and target variable for test set
X_test = test_df[['Pclass', 'Sex_male', 'Age', 'SibSp']].values
y_test = test_df['Survived'].values

# Scale the input features using standard scaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build neural network model
model = Sequential()
model.add(Dense(units=16, activation='sigmoid', input_dim=4))
model.add(Dense(units=8, activation='sigmoid'))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Step 3: Evaluate the model
# Predict on test set
y_pred = model.predict_classes(X_test)
y_pred = y_pred.flatten()

# Calculate out-of-sample accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Out-of-sample Accuracy: {:.2f}%".format(accuracy * 100))

# Step 4: Compare with Random Forest
# Prepare input features for random forest
X_train_rf = train_df[['Pclass', 'Sex_male', 'Age', 'SibSp']].values
X_test_rf = test_df[['Pclass', 'Sex_male', 'Age', 'SibSp']].values

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_rf, y_train)

# Predict on test set
y_pred_rf = rf.predict(X_test_rf)

# Calculate out-of-sample accuracy for Random Forest
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Out-of-sample Accuracy (Random Forest): {:.2f}%".format(accuracy_rf * 100))
