## **Read the data and visualize its general structure**

In [2]:
import pandas as pd
import torch

data_frame=pd.read_csv("titanic.csv")
data_frame.info()
data_frame.head()
"""
we consider 'Survived' feature as our target feature
"""

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


## **Remove non significant columns and rows with missing cells**

In [3]:
data_frame=data_frame.drop(columns=['PassengerId','Ticket','Cabin','Name']) #remove insignificant column
data_frame=data_frame.dropna() #remove rows with empty cells
data_frame.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S


## **Encode String columns with numbers (Label encoding)**

In [4]:
from sklearn import preprocessing as pprocess

le=pprocess.LabelEncoder() #encoder object

le.fit(data_frame['Sex'].unique()) #result ==> 0: female | 1: male
Sex_encoded=le.transform(data_frame['Sex'].values)

le.fit(data_frame['Embarked'].unique()) #result ==> 0: C | 1: Q |2: S
Embarked_encoded=le.transform(data_frame['Embarked'].values)

data_frame=data_frame.drop(columns=['Sex','Embarked']) #remove old columns
data_frame['Sex'],data_frame['Embarked']=Sex_encoded, Embarked_encoded #place encoded ones
data_frame.head()

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex,Embarked
0,0,3,22.0,1,0,7.25,1,2
1,1,1,38.0,1,0,71.2833,0,0
2,1,3,26.0,0,0,7.925,0,2
3,1,1,35.0,1,0,53.1,0,2
4,0,3,35.0,0,0,8.05,1,2


## **Split the dataset into training and test data (ratio: 80%/20%)**

In [5]:
from sklearn.model_selection import train_test_split

train,test= train_test_split(data_frame, test_size=0.2, stratify=data_frame['Survived'])
#stratify=data_frame['Survived'] means that the proportion of survivors will be the same in train set and test set

## **Good practice: make the data overwrite Pytorch Dataset class to simplify some operations**

In [6]:
from torch.utils.data import Dataset

class Titanic_train_dataset(Dataset): #All datasets that represent a map from keys to data samples should inherit from Dataset Class
    def __init__(self, train):
        self.train=train
        self.inputs=train.iloc[:,1:].values
        self.outputs=train.iloc[:,0].values #we consider 'Survived' as output

    def __len__(self):
          return len(self.inputs)

    def __getitem__(self, idx): #we must overwrite it
        return {'input': torch.tensor(self.inputs[idx]), 
                'output': torch.tensor(self.outputs[idx])
                } #return in tensor form to feed the network model

train= Titanic_train_dataset(train)

## **Separate the data into batches**

In [8]:
from torch.utils.data import DataLoader

BATCH_SIZE=16
batched_train=DataLoader(dataset = train, batch_size = BATCH_SIZE, shuffle =False) #automatically separate training data into batches

## **Neural network structure**

In [54]:
import torch.nn as nn
#import torch.nn.functional as F

class Network(nn.Module): 
      def __init__(self):
          # we define the network layer by layer
          # almost all Linear layers take as parameters: input size (nuumer of neurons on previous layer), output size (number of neurons on next layer)
        super().__init__()

        self.fc1=nn.Linear(7, 16) # Linear layers do W*X+b
        self.bn1=nn.BatchNorm1d(16)  #apparently, it speeds the training
        self.relu1=nn.ReLU() #activation function

        self.fc2=nn.Linear(16,8)
        self.bn2=nn.BatchNorm1d(8)
        self.relu2=nn.ReLU()

        self.fc3=nn.Linear(8,4)
        self.bn3=nn.BatchNorm1d(4)
        self.relu3=nn.ReLU()

        self.fc4=nn.Linear(4,1)
        self.sigmoid=nn.Sigmoid() #to give us a probability of having survived
  
      def forward(self,x): #forwarding the data from input layer to output layer
        x=self.fc1(x)
        x=self.bn1(x)
        x=self.relu1(x)

        x=self.fc2(x)
        x=self.bn2(x)
        x=self.relu2(x)

        x=self.fc3(x)
        x=self.bn3(x)
        x=self.relu3(x)

        x=self.fc4(x)
        x=self.sigmoid(x)

        return x

model=Network()

## **Defining one iteration of the training process**

In [55]:
def train(model, x, y, optimizer, criterion):

    model.zero_grad() # initialize the gradients to 0

    x=x.to(torch.float32) # needed format
    output = model(x) # forward propagation (outputs probability of having survived)

    output=output.reshape(-1) #to make the output shape go from [dim_x,1] to [dim_x]...To avoid an error relative to dimension in next step
    output=output.to(torch.float32)
    y=y.to(torch.float32)

    loss= criterion(output,y) # calculating the loss value
    loss.backward() # backward propagation to calculate gradients
    
    optimizer.step() # update parameters with gradients
    
    return loss, output

## **Defining hyperparameters and loss function method**

In [56]:
from torch.optim import Adam #The choice of optimizer algorithm is empirical (Adam works well here)

criterion = nn.MSELoss() #create object that has loss function method
EPOCHS = 200 #number of iterations
optm = Adam(model.parameters(), lr = 0.001) #optimization_method(model_parameters, learning_rate)

## **Optionnal: trying to use the GPU**

In [57]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Cuda Device Available")
    print("Name of the Cuda Device: ", torch.cuda.get_device_name())
    print("GPU Computational Capablity: ", torch.cuda.get_device_capability())

Cuda Device Available
Name of the Cuda Device:  Tesla T4
GPU Computational Capablity:  (7, 5)


## **Launch the training process**

In [58]:
for epoch in range(EPOCHS):
    epoch_loss=0
    for batch in batched_train: #feed the network with all the batches during one epoch
        x,y= batch['input'],batch['output'] #get the current batch
        x,y= x.to(device),y.to(device) # move it to current device (gpu if available)
        model.to(device) #move the model to device
        loss, prediction= train(model ,x ,y ,optm , criterion)
        epoch_loss+= loss
    print("loss: ",loss.item())
    

loss:  0.26513558626174927
loss:  0.25787508487701416
loss:  0.2427477389574051
loss:  0.23950304090976715
loss:  0.23775076866149902
loss:  0.23063193261623383
loss:  0.2331971675157547
loss:  0.2277664691209793
loss:  0.22687676548957825
loss:  0.2280125617980957
loss:  0.22061045467853546
loss:  0.21470659971237183
loss:  0.21055357158184052
loss:  0.20841526985168457
loss:  0.20767368376255035
loss:  0.22008968889713287
loss:  0.20880325138568878
loss:  0.22980369627475739
loss:  0.22274358570575714
loss:  0.21794311702251434
loss:  0.22299793362617493
loss:  0.21635156869888306
loss:  0.21516045928001404
loss:  0.20748241245746613
loss:  0.211456298828125
loss:  0.2109559178352356
loss:  0.20875197649002075
loss:  0.2067839652299881
loss:  0.2244202345609665
loss:  0.20307739078998566
loss:  0.24503786861896515
loss:  0.21692419052124023
loss:  0.21669520437717438
loss:  0.23795635998249054
loss:  0.19991528987884521
loss:  0.2166547030210495
loss:  0.21252135932445526
loss:  0.20

## **Model Evaluation**

In [83]:
from sklearn.metrics import classification_report as cr
import numpy as np

x_test, y_test = test.iloc[:,1:].values, test.iloc[:,0].values #extract test data values (without index)
x_test, y_test = torch.tensor(x_test,dtype=torch.float32), torch.tensor(y_test,dtype=torch.float32) #convert to float32 tensor
x_test, y_test = x_test.to(device), y_test.to(device) #move to specified device (gpu here)

y_pred=model(x_test) #make prediction

#optionnal: moving to cpu after the prediction to avoid errors while calling only-cpu functions (classification_report)
y_pred=y_pred.cpu()
y_test=y_test.cpu()

#convert tensor to numpy
y_pred=y_pred.detach().numpy().reshape(-1)
y_test=y_test.detach().numpy()

y_pred=np.where(y_pred<0.5, 0, 1)

print(cr(y_true=y_test, y_pred=y_pred))




              precision    recall  f1-score   support

         0.0       0.82      0.88      0.85        85
         1.0       0.81      0.72      0.76        58

    accuracy                           0.82       143
   macro avg       0.82      0.80      0.81       143
weighted avg       0.82      0.82      0.82       143

