In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model

import tensorflow
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras import utils
from tensorflow.keras import optimizers
from sklearn.preprocessing import StandardScaler

In [None]:
# Load the Titanic dataset
titanic_df = pd.read_csv('train.csv')

# Preprocessing: Drop unnecessary columns and handle missing values
titanic_df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
titanic_df['Age'].fillna(titanic_df['Age'].median(), inplace=True)
titanic_df['Embarked'].fillna('S', inplace=True)
#features = ["Pclass", "Sex", "SibSp", "Parch"]
# Convert categorical variables to numerical
titanic_df['Sex'] = pd.factorize(titanic_df['Sex'])[0]
titanic_df['Embarked'] = pd.factorize(titanic_df['Embarked'])[0]

#Preprocessing numerical values
scaler = StandardScaler()
#numeric_cols = ['Age', 'Fare', 'Pclass']
#titanic_df[numeric_cols] = scaler.fit_transform(titanic_df[numeric_cols]) 

# Split data into train and test sets
train_df = titanic_df.sample(frac=0.8, random_state=42)
test_df = titanic_df.drop(train_df.index)

# Define the input shape and output shape
input_shape = (train_df.shape[1] - 1,)
output_shape = 2



In [None]:
titanic_df

In [None]:
# Define the neural network architecture
inputs = Input(shape=input_shape, dtype=tf.float32)
x = Dense(32, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
outputs = Dense(output_shape, activation='softmax')(x)
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Convert labels to categorical data type
train_labels = tf.keras.utils.to_categorical(train_df['Survived'], num_classes=output_shape)
test_labels = tf.keras.utils.to_categorical(test_df['Survived'], num_classes=output_shape)

train_features = train_df.drop(['Survived'], axis=1)
test_features = test_df.drop(['Survived'], axis=1)
# Train the model
num_epochs = 100
history = model.fit(train_features, train_labels,
                    validation_data=(test_features, test_labels),
                    epochs=num_epochs, batch_size=70)


In [None]:
# Tensorflow doesn't have a built-in confusion matrix metric, so we'll use SciKit-Learn
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline


class_probabilities = model.predict(test_features)
predictions = np.argmax(class_probabilities, axis=1)
true_labels = np.argmax(test_labels, axis=1)


In [None]:
# Plot the confusion matrix
cm = confusion_matrix(true_labels, predictions)
# Compute the confusion matrix
cm = confusion_matrix(true_labels, predictions)

# Print the confusion matrix
print("Confusion Matrix:")
print(cm)


In [None]:
#Plot for training vs validation loss
%matplotlib inline
from matplotlib import pyplot as plt

epoch_nums = range(1,num_epochs+1)
training_loss = history.history["loss"]
validation_loss = history.history["val_loss"]
plt.plot(epoch_nums, training_loss)
plt.plot(epoch_nums, validation_loss)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['training', 'validation'], loc='upper right')
plt.show()

In [None]:
# Save the trained model
modelFileName = 'models/survivors-classifier.h5'
model.save(modelFileName)
del model  # deletes the existing model variable
print('model saved as', modelFileName)

In [None]:
# Load the test dataset
test_data = pd.read_csv('test.csv')
passenger_ids = test_data['PassengerId']

# Preprocess the test data
# ...
# Preprocessing: Drop unnecessary columns and handle missing values
test_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test_data['Age'].fillna(test_data['Age'].median(), inplace=True)
test_data['Embarked'].fillna('S', inplace=True)

# Convert categorical variables to numerical
test_data['Sex'] = pd.factorize(test_data['Sex'])[0]
test_data['Embarked'] = pd.factorize(test_data['Embarked'])[0]

# Load the trained TensorFlow model
model = tf.keras.models.load_model(modelFileName)

# Make predictions on the test data
class_probabilities = model.predict(test_data)
predictions = np.argmax(class_probabilities, axis=1)




# Save the predicted labels to a CSV file
submission = pd.DataFrame({'PassengerId': passenger_ids, 'Survived': predictions})
submission.to_csv('submission.csv', index=False)

In [None]:
submission