In [91]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

### First, we load the titatic dataset and convert all categorical variables to one-hot

In [92]:
data_frame = pd.read_csv('titanic.csv')
data_frame = data_frame.drop(['Name', 'Ticket', 'Cabin', 'SibSp', 'Parch'], axis=1)
data_frame = pd.get_dummies(data_frame, columns=['Sex', 'Embarked'])
data_frame = data_frame.fillna(data_frame.mean())
data_frame

Unnamed: 0,PassengerId,Survived,Pclass,Age,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,22.000000,7.2500,False,True,False,False,True
1,2,1,1,38.000000,71.2833,True,False,True,False,False
2,3,1,3,26.000000,7.9250,True,False,False,False,True
3,4,1,1,35.000000,53.1000,True,False,False,False,True
4,5,0,3,35.000000,8.0500,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,27.000000,13.0000,False,True,False,False,True
887,888,1,1,19.000000,30.0000,True,False,False,False,True
888,889,0,3,29.699118,23.4500,True,False,False,False,True
889,890,1,1,26.000000,30.0000,False,True,True,False,False


### Next, we convert the dataset to a NumPy array and normalize it

In [93]:
labels = data_frame[['Survived']].to_numpy().astype(float)
observ = data_frame.drop(['PassengerId', 'Survived'], axis=1).to_numpy().astype(float)
X_train, X_test, y_train, y_test = train_test_split(observ, labels, test_size=0.25, random_state=42)

In [94]:
# We have to normalize Age and Fare, so columns 0,3
age_min, age_max = min(data_frame['Age']), max(data_frame['Age'])
fare_min, fare_max = min(data_frame['Fare']), max(data_frame['Fare'])
"""age_min, age_max = data_frame['Age'].min(), data_frame['Age'].min()
fare_min, fare_max = data_frame['Fare'].max(), data_frame['Fare'].max()"""
# Apply
X_train[:,0] = (X_train[:,0] - age_min) / (age_max - age_min + 1e-5)
X_test[:,0] = (X_test[:,0] - age_min) / (age_max - age_min + 1e-5)
X_train[:,3] = (X_train[:,3] - fare_min) / (fare_max - fare_min + 1e-5)
X_test[:,3] = (X_test[:,3] - fare_min) / (fare_max - fare_min + 1e-5)

### Now, let's create our sequential model with four linear layers

In [95]:
import torch, torch.nn as nn, torch.nn.functional as F
first_model = nn.Sequential(
    nn.Linear(len(observ[0]), 256),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(64, 1),
    nn.Sigmoid()
)

### We can already apply it to our test dataset

In [96]:
first_model(torch.tensor(X_test[:20], dtype=torch.float))
first_model

Sequential(
  (0): Linear(in_features=8, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=256, out_features=128, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=128, out_features=64, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.5, inplace=False)
  (9): Linear(in_features=64, out_features=1, bias=True)
  (10): Sigmoid()
)

### Now, let's fit our model

In [97]:
first_model.fit(X_train, y_train, X_test, y_test)

AttributeError: 'Sequential' object has no attribute 'fit'

### As you may have seen, this does not work. We have to implement the training ourselves!

In [113]:
class MyMLPModel(nn.Module):
    def __init__(self, input, *hidden_layers, lr=0.07, dropout=0.2):
        super().__init__() # <- Very important!
        self.lr = lr
        ## Build model
        n_neurons = [input] + list(hidden_layers)
        self.layers = []
        for i, o in zip(n_neurons[:-1], n_neurons[1:]):
            self.layers += [
                    nn.Linear(i, o),
                    nn.ReLU(),
                    nn.Dropout(dropout),
            ]
        self.layers += [
                nn.Linear(n_neurons[-1], 1),
                nn.Sigmoid()
        ]
        
        self.layers = nn.Sequential(*self.layers) # Create a sequential model
        
    def forward(self, X):
        return self.layers(X)
    
    def predict(self, X, th=0.5):
        X = torch.tensor(X, dtype=torch.float)
        with torch.no_grad():
            y_hat = self.forward(X)
        return (y_hat >= th).float()
    
    def train_step(self, X, y):
        y_hat = self.forward(X)
        return F.binary_cross_entropy(y_hat, y)
        
    def validation_step(self, X, y):
        with torch.no_grad():
            return self.train_step(X, y)
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)
        return optimizer
        
    def fit(self, X_train, y_train, X_valid, y_valid, epochs=10):
        ## Convert dataset
        X_train = torch.tensor(X_train, dtype=torch.float)
        y_train = torch.tensor(y_train, dtype=torch.float)
        
        X_valid = torch.tensor(X_valid, dtype=torch.float)
        y_valid = torch.tensor(y_valid, dtype=torch.float)
        
        ## Load Optimizer
        optimizer = self.configure_optimizers()

        for epoch in range(epochs):
            print(f'{epoch+1}/{epochs}:')
            # Training
            self.train() # Set model to training mode
            optimizer.zero_grad() # Sets all gradients to Zero
            loss = self.train_step(X_train, y_train) # Execute Forward pass and calculate Loss
            loss.backward() # Execute Backward pass
            optimizer.step() # Update weights
            self.eval() # Set model to validation mode
            
            # Validation
            loss_valid = self.validation_step(X_valid, y_valid)
            print(f'Training Loss: {loss.item():1.4f}', f'Validation Loss: {loss_valid.item():1.4f}')
            
        return self

### Create a model similar to the one before

In [114]:
second_model = MyMLPModel(X_train.shape[1], 32, 16, 8, 4)
second_model

MyMLPModel(
  (layers): Sequential(
    (0): Linear(in_features=8, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=16, out_features=8, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=8, out_features=4, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=4, out_features=1, bias=True)
    (13): Sigmoid()
  )
)

In [115]:
second_model.predict(X_test[:20])

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]])

### Train it and calculate the test accuracy 

In [118]:
second_model = second_model.fit(X_train, y_train, X_test, y_test, epochs=1000)

1/15000:
Training Loss: 0.5087 Validation Loss: 0.4983
2/15000:
Training Loss: 0.5047 Validation Loss: 0.5080
3/15000:
Training Loss: 0.4989 Validation Loss: 0.4989
4/15000:
Training Loss: 0.5103 Validation Loss: 0.5014
5/15000:
Training Loss: 0.5073 Validation Loss: 0.5009
6/15000:
Training Loss: 0.4954 Validation Loss: 0.5008
7/15000:
Training Loss: 0.5047 Validation Loss: 0.5043
8/15000:
Training Loss: 0.5215 Validation Loss: 0.5291
9/15000:
Training Loss: 0.5345 Validation Loss: 0.5137
10/15000:
Training Loss: 0.5234 Validation Loss: 0.4984
11/15000:
Training Loss: 0.4929 Validation Loss: 0.5004
12/15000:
Training Loss: 0.5014 Validation Loss: 0.4984
13/15000:
Training Loss: 0.5010 Validation Loss: 0.4980
14/15000:
Training Loss: 0.5193 Validation Loss: 0.5014
15/15000:
Training Loss: 0.5112 Validation Loss: 0.5027
16/15000:
Training Loss: 0.5215 Validation Loss: 0.4948
17/15000:
Training Loss: 0.5220 Validation Loss: 0.4964
18/15000:
Training Loss: 0.5070 Validation Loss: 0.4963
1

In [119]:
from sklearn.metrics import accuracy_score
y_pred = second_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7443946188340808