## Feed Forward Neural Network: Titanic Dataset

In this project, we'll once again use the Titanic dataset, but this time utilizing a simple feed-forward neural network with a single hidden layer.

We will begin by importing the necessary modules.

In [1]:
# pytorch
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

# scaling
from sklearn.preprocessing import StandardScaler

#data science
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Next, we will load the data using the `pd.read_csv` function.

In [2]:
df_titanic = pd.read_csv('titanic.csv')
df_titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Next, we will refine the dataframe to include only the relevant features and the desired output for our deep learning model. Following this, we will save this modified dataframe as a CSV file, since our Deep Learning dataset class will require it in this format.

In [3]:
df_titanic['Sex'] = df_titanic['Sex'].replace(["female", "male"], [0, 1])
df_titanic['Embarked'] = df_titanic['Embarked'].replace(['S', 'C', 'Q'], [0, 1, 2])

df_titanic_upd = df_titanic[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived']]

df_titanic_upd.to_csv('titanic_updated.csv')

Now we are going to create our dataset class.

In [4]:
class CustomDataset(Dataset):
    def __init__(self, csv_file):
        df = pd.read_csv(csv_file).dropna(axis=0).reset_index(drop=True)
        self.features = df.iloc[:, :-1].values
        self.labels = df.iloc[:, -1].values
    
    # scaling
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = torch.tensor(self.features[idx], dtype=torch.float)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return feature, label

Next, we'll feed our newly created CSV file into the dataset class we've set up. Then, we'll utilize the DataLoader to prepare it for training.

In [5]:
csv = 'titanic_updated.csv'
dataset = CustomDataset(csv)

batch_size =64
data_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

Now, we'll construct a class for a simple feed-forward neural network with one hidden layer.

In [6]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.layer2 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.Sigmoid()
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        return out

Now, we'll instantiate the model and specify the loss function.

In [7]:
input_size = dataset.features.shape[1]
num_classes = len(set(dataset.labels))
hidden_size = 120

# define the model
model = NeuralNet(input_size, hidden_size, num_classes)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)   

and finally training loop

In [8]:
num_epochs = 250

for epoch in range(num_epochs):
    total_loss = 0
    correct_predictions = 0
    for features, labels in data_loader:
        
        # clear gradients
        optimizer.zero_grad()
        
        # forward pass
        outputs = model(features)
        
        # calculate loss
        loss = criterion(outputs, labels)
        
        # backward
        loss.backward()
        
        optimizer.step()
        
        total_loss = total_loss + loss.item()
        
        _, predicted = torch.max(outputs, 1)
        correct_predictions = correct_predictions + (predicted == labels).sum().item()
        
    average_loss = total_loss / len(data_loader)
    accuracy = correct_predictions / len(dataset) * 100
    
    if (epoch + 1) % 50 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}] - Average Loss: {average_loss:.4f} - Accuracy: {accuracy:.2f}%")

print("Training completed.")
        

Epoch [50/250] - Average Loss: 0.6405 - Accuracy: 60.67%
Epoch [100/250] - Average Loss: 0.6125 - Accuracy: 66.85%
Epoch [150/250] - Average Loss: 0.5983 - Accuracy: 70.93%
Epoch [200/250] - Average Loss: 0.5852 - Accuracy: 73.88%
Epoch [250/250] - Average Loss: 0.5573 - Accuracy: 75.00%
Training completed.


In [9]:
# Calculate accuracy and loss of prediction
test_loader = DataLoader(dataset=dataset, batch_size=len(dataset), shuffle=False)
with torch.no_grad():
    model.eval()
    for features, labels in test_loader:
        outputs = model(features)
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs, 1)
        test_accuracy = (predicted == labels).sum().item() / len(dataset) * 100
        test_loss = loss.item()
    
    print(f"Test Loss: {test_loss:.4f} - Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.5568 - Test Accuracy: 75.00%
