In [1]:
# Import required libraries

import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import L2
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

2024-04-21 11:49:43.573230: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv('data.csv')

In [3]:
# Print first 2 rows
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [4]:
# Data preprocessing

def get_data_preprocessed(dataframe):
    
    # Drop columns which doesn't contribute much to the survival decision
    dataframe = dataframe.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])
    
    # One hot encoding of categorical columns
    dataframe = pd.get_dummies(dataframe, columns=['Sex', 'Embarked'], drop_first=True)
    
    # Fill missing values
    dataframe['Age'].fillna(dataframe['Age'].mean(), inplace=True)
    
    
    return dataframe

In [5]:
# Split train.csv in train, test dataset

X = get_data_preprocessed(df)
X = X.drop('Survived', axis=1).astype(float)
y = df[['Survived']].astype(float)
X_train, X_cv, y_train, y_cv = train_test_split(X, y, test_size=0.2)

In [6]:
X.head(2)

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Sex_male,Embarked_Q,Embarked_S
0,3.0,22.0,1.0,0.0,7.25,1.0,0.0,1.0
1,1.0,38.0,1.0,0.0,71.2833,0.0,0.0,0.0


In [None]:
# Prepare model using neural network

def prepare_model(X_train, y_train):
    model = Sequential([
        tf.keras.Input(shape=(8,)),
        Dense(units=36, activation='relu'),
        Dense(units=18, activation='relu'),
        Dense(units=1, activation='sigmoid', kernel_regularizer=L2(0.01))
    ], name='my_model'
    )

    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.Adam(0.001),
    )

    model.fit(
        X_train, y_train,
        epochs=100,
        verbose=1
    )
    return model

model = prepare_model(X_train, y_train)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# Convert the probability predictions to binary predictions based on a threshold (0.5)
y_pred_prob = model.predict(X_cv)
y_pred = (y_pred_prob >= 0.5).astype(int)

# Convert the true labels to int
y_true = y_cv.values.astype(int)

In [None]:
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix)

# Print a classification report for more detailed metrics
report = classification_report(y_true, y_pred)
print("\nClassification Report:")
print(report)