In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout




In [2]:
# Load Titanic dataset from Seaborn
titanic = sns.load_dataset('titanic')

titanic.head()


Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [3]:
# Select relevant features and drop rows with missing values
titanic = titanic[['survived', 'pclass', 'sex', 'age', 'fare', 'embark_town']].dropna()

# Encode categorical variables
titanic['sex'] = titanic['sex'].map({'male': 0, 'female': 1})
titanic = pd.get_dummies(titanic, columns=['embark_town'], drop_first=True)

# Split dataset into features (X) and target (y)
X = titanic.drop(columns=['survived'])
y = titanic['survived']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [4]:
# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [5]:
# Train Logistic Regression Model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Predictions
y_pred_lr = log_reg.predict(X_test)


In [6]:
# Evaluate Logistic Regression
lr_metrics = {
    'Accuracy': accuracy_score(y_test, y_pred_lr),
    'Precision': precision_score(y_test, y_pred_lr),
    'Recall': recall_score(y_test, y_pred_lr),
    'F1 Score': f1_score(y_test, y_pred_lr),
    'Confusion Matrix': confusion_matrix(y_test, y_pred_lr)
}

# Print Logistic Regression results
for metric, value in lr_metrics.items():
    print(f"{metric}: {value}")


Accuracy: 0.7832167832167832
Precision: 0.7454545454545455
Recall: 0.7068965517241379
F1 Score: 0.7256637168141593
Confusion Matrix: [[71 14]
 [17 41]]


In [7]:
# Build ANN Model
ann = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])




In [8]:
# Compile the model
ann.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
ann.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 32)                224       
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
Total params: 769 (3.00 KB)
Trainable params: 769 (3.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [9]:
# Train the model
history = ann.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [10]:
# Predict class probabilities
y_pred_ann_prob = ann.predict(X_test)

# Convert probabilities to binary values (0 or 1)
y_pred_ann = (y_pred_ann_prob > 0.5).astype(int).flatten()



In [11]:
# Evaluate ANN Model
ann_metrics = {
    'Accuracy': accuracy_score(y_test, y_pred_ann),
    'Precision': precision_score(y_test, y_pred_ann),
    'Recall': recall_score(y_test, y_pred_ann),
    'F1 Score': f1_score(y_test, y_pred_ann),
    'Confusion Matrix': confusion_matrix(y_test, y_pred_ann)
}

# Print ANN results
for metric, value in ann_metrics.items():
    print(f"{metric}: {value}")


Accuracy: 0.7902097902097902
Precision: 0.7916666666666666
Recall: 0.6551724137931034
F1 Score: 0.7169811320754716
Confusion Matrix: [[75 10]
 [20 38]]


In [12]:
# Create a DataFrame to compare Logistic Regression and ANN metrics
import pandas as pd

comparison_df = pd.DataFrame({
    "Metric": ["Accuracy", "Precision", "Recall", "F1 Score"],
    "Logistic Regression": [lr_metrics["Accuracy"], lr_metrics["Precision"], lr_metrics["Recall"], lr_metrics["F1 Score"]],
    "ANN": [ann_metrics["Accuracy"], ann_metrics["Precision"], ann_metrics["Recall"], ann_metrics["F1 Score"]]
})

# Display the comparison table
print(comparison_df)


      Metric  Logistic Regression       ANN
0   Accuracy             0.783217  0.790210
1  Precision             0.745455  0.791667
2     Recall             0.706897  0.655172
3   F1 Score             0.725664  0.716981
