In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

### First, we load the titatic dataset and convert all categorical variables to one-hot

In [2]:
data_frame = pd.read_csv('titanic.csv')
data_frame = data_frame.drop(['Name', 'Ticket', 'Cabin'], axis=1)
data_frame = pd.get_dummies(data_frame, columns=['Pclass', 'Sex', 'Embarked'])
data_frame = data_frame.fillna(data_frame.mean())
data_frame

Unnamed: 0,PassengerId,Survived,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,0,22.000000,1,0,7.2500,False,False,True,False,True,False,False,True
1,2,1,38.000000,1,0,71.2833,True,False,False,True,False,True,False,False
2,3,1,26.000000,0,0,7.9250,False,False,True,True,False,False,False,True
3,4,1,35.000000,1,0,53.1000,True,False,False,True,False,False,False,True
4,5,0,35.000000,0,0,8.0500,False,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,27.000000,0,0,13.0000,False,True,False,False,True,False,False,True
887,888,1,19.000000,0,0,30.0000,True,False,False,True,False,False,False,True
888,889,0,29.699118,1,2,23.4500,False,False,True,True,False,False,False,True
889,890,1,26.000000,0,0,30.0000,True,False,False,False,True,True,False,False


### Next, we convert the dataset to a NumPy array and normalize it

In [7]:
labels = data_frame[['Survived']].to_numpy().astype(float)
observ = data_frame.drop(['PassengerId', 'Survived'], axis=1).to_numpy().astype(float)
X_train, X_test, y_train, y_test = train_test_split(observ, labels, test_size=0.25, random_state=42)

In [9]:
observ.shape

(891, 12)

In [10]:
# We have to normalize Age and Fare, so columns 0,3
age_min, age_max = X_train[:,0].min(), X_train[:,0].max()
fare_min, fare_max = X_train[:,3].min(), X_train[:,3].max()
# Apply
X_train[:,0] = (X_train[:,0] - age_min) / (age_max - age_min + 1e-5)
X_test[:,0] = (X_test[:,0] - age_min) / (age_max - age_min + 1e-5)
X_train[:,3] = (X_train[:,3] - fare_min) / (fare_max - fare_min + 1e-5)
X_test[:,3] = (X_test[:,3] - fare_min) / (fare_max - fare_min + 1e-5)

### Now, let's create our sequential model with four linear layers

In [21]:
import torch, torch.nn as nn, torch.nn.functional as F
first_model = [
    nn.Linear(X_train.shape[1], 256),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(64, 1),
    nn.Sigmoid()
]

### We can already apply it to our test dataset

In [22]:
inp = torch.tensor(X_test[:20], dtype=torch.float)
for layer in first_model:
    inp = layer(inp)
#first_model(torch.tensor(X_test[:20], dtype=torch.float))

In [23]:
inp

tensor([[0.4971],
        [0.4768],
        [0.4897],
        [0.4774],
        [0.4728],
        [0.4863],
        [0.4612],
        [0.4299],
        [0.4836],
        [0.5055],
        [0.4854],
        [0.4808],
        [0.4626],
        [0.4943],
        [0.4635],
        [0.4824],
        [0.4464],
        [0.4984],
        [0.4715],
        [0.4826]], grad_fn=<SigmoidBackward0>)

### Now, let's fit our model

In [None]:
first_model.fit(X_train, y_train, X_test, y_test)

### As you may have seen, this does not work. We have to implement the training ourselves!

In [24]:
class MyMLPModel(nn.Module):
    def __init__(self, input, *hidden_layers, lr=0.1, dropout=0.2):
        super().__init__() # <- Very important!
        self.lr = lr
        ## Build model
        n_neurons = [input] + list(hidden_layers)
        self.layers = []
        for i, o in zip(n_neurons[:-1], n_neurons[1:]):
            self.layers += [
                nn.Linear(i, o),
                nn.ReLU(),
                nn.Dropout(dropout),
            ]
        self.layers += [
            nn.Linear(hidden_layers[-1], 1), # Output layer with no final activation
            nn.Sigmoid(), # Final activation function
        ]
        
        self.layers = nn.Sequential(*self.layers) # Create a sequential model
        
    def forward(self, X):
        return self.layers(X)
    
    def predict(self, X, th=0.5):
        X = torch.tensor(X, dtype=torch.float)
        with torch.no_grad():
            y_hat = self(X)
        return (y_hat >= th).float()
    
    def train_step(self, X, y):
        y_hat = self(X)
        return F.binary_cross_entropy(y_hat, y)
        
    def validation_step(self, X, y):
        with torch.no_grad():
            return self.train_step(X, y)
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)
        return optimizer
        
    def fit(self, X_train, y_train, X_valid, y_valid, epochs=10):
        ## Convert dataset
        X_train = torch.tensor(X_train, dtype=torch.float)
        y_train = torch.tensor(y_train, dtype=torch.float)
        
        X_valid = torch.tensor(X_valid, dtype=torch.float)
        y_valid = torch.tensor(y_valid, dtype=torch.float)
        
        ## Load Optimizer
        optimizer = self.configure_optimizers()
        
        for epoch in range(epochs):
            print(f'{epoch+1}/{epochs}:')
            # Training
            self.train() # Set model to training mode
            optimizer.zero_grad(set_to_none=True) # Sets all gradients to Zero (Default is to None) 
            loss = self.train_step(X_train, y_train) # Execute Forward pass and calculate Loss
            loss.backward() # Execute Backward pass
            optimizer.step() # Update weights
            self.eval() # Set model to validation mode
            
            # Validation
            loss_valid = self.validation_step(X_valid, y_valid)
            print(f'Training Loss: {loss.item():1.4f}', f'Validation Loss: {loss_valid.item():1.4f}')
            
        return self

### Create a model similar to the one before

In [30]:
second_model = MyMLPModel(X_train.shape[1], 32, 16, 8, 4)
second_model

MyMLPModel(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=32, out_features=16, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=16, out_features=8, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=8, out_features=4, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.2, inplace=False)
    (15): Linear(in_features=4, out_features=1, bias=True)
    (16): Sigmoid()
  )
)

In [31]:
second_model.predict(X_test[:20])

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])

### Train it and calculate the test accuracy 

In [32]:
second_model = second_model.fit(X_train, y_train, X_test, y_test, epochs=1000)

1/1000:
Training Loss: 0.6773 Validation Loss: 0.6799
2/1000:
Training Loss: 0.6764 Validation Loss: 0.6793
3/1000:
Training Loss: 0.6756 Validation Loss: 0.6788
4/1000:
Training Loss: 0.6749 Validation Loss: 0.6783
5/1000:
Training Loss: 0.6743 Validation Loss: 0.6778
6/1000:
Training Loss: 0.6741 Validation Loss: 0.6774
7/1000:
Training Loss: 0.6732 Validation Loss: 0.6770
8/1000:
Training Loss: 0.6726 Validation Loss: 0.6767
9/1000:
Training Loss: 0.6725 Validation Loss: 0.6763
10/1000:
Training Loss: 0.6716 Validation Loss: 0.6760
11/1000:
Training Loss: 0.6711 Validation Loss: 0.6757
12/1000:
Training Loss: 0.6707 Validation Loss: 0.6754
13/1000:
Training Loss: 0.6701 Validation Loss: 0.6752
14/1000:
Training Loss: 0.6695 Validation Loss: 0.6749
15/1000:
Training Loss: 0.6696 Validation Loss: 0.6747
16/1000:
Training Loss: 0.6693 Validation Loss: 0.6745
17/1000:
Training Loss: 0.6685 Validation Loss: 0.6743
18/1000:
Training Loss: 0.6683 Validation Loss: 0.6742
19/1000:
Training L

Training Loss: 0.6638 Validation Loss: 0.6733
206/1000:
Training Loss: 0.6639 Validation Loss: 0.6733
207/1000:
Training Loss: 0.6642 Validation Loss: 0.6733
208/1000:
Training Loss: 0.6638 Validation Loss: 0.6733
209/1000:
Training Loss: 0.6634 Validation Loss: 0.6733
210/1000:
Training Loss: 0.6633 Validation Loss: 0.6733
211/1000:
Training Loss: 0.6635 Validation Loss: 0.6733
212/1000:
Training Loss: 0.6630 Validation Loss: 0.6733
213/1000:
Training Loss: 0.6631 Validation Loss: 0.6733
214/1000:
Training Loss: 0.6633 Validation Loss: 0.6733
215/1000:
Training Loss: 0.6632 Validation Loss: 0.6733
216/1000:
Training Loss: 0.6633 Validation Loss: 0.6733
217/1000:
Training Loss: 0.6635 Validation Loss: 0.6733
218/1000:
Training Loss: 0.6634 Validation Loss: 0.6733
219/1000:
Training Loss: 0.6631 Validation Loss: 0.6733
220/1000:
Training Loss: 0.6638 Validation Loss: 0.6733
221/1000:
Training Loss: 0.6640 Validation Loss: 0.6733
222/1000:
Training Loss: 0.6634 Validation Loss: 0.6733
22

Training Loss: 0.6630 Validation Loss: 0.6730
466/1000:
Training Loss: 0.6630 Validation Loss: 0.6730
467/1000:
Training Loss: 0.6636 Validation Loss: 0.6730
468/1000:
Training Loss: 0.6633 Validation Loss: 0.6730
469/1000:
Training Loss: 0.6630 Validation Loss: 0.6730
470/1000:
Training Loss: 0.6628 Validation Loss: 0.6730
471/1000:
Training Loss: 0.6635 Validation Loss: 0.6730
472/1000:
Training Loss: 0.6624 Validation Loss: 0.6730
473/1000:
Training Loss: 0.6633 Validation Loss: 0.6730
474/1000:
Training Loss: 0.6627 Validation Loss: 0.6730
475/1000:
Training Loss: 0.6631 Validation Loss: 0.6730
476/1000:
Training Loss: 0.6631 Validation Loss: 0.6730
477/1000:
Training Loss: 0.6628 Validation Loss: 0.6730
478/1000:
Training Loss: 0.6638 Validation Loss: 0.6730
479/1000:
Training Loss: 0.6632 Validation Loss: 0.6730
480/1000:
Training Loss: 0.6634 Validation Loss: 0.6730
481/1000:
Training Loss: 0.6636 Validation Loss: 0.6730
482/1000:
Training Loss: 0.6631 Validation Loss: 0.6730
48

Training Loss: 0.6631 Validation Loss: 0.6721
729/1000:
Training Loss: 0.6624 Validation Loss: 0.6721
730/1000:
Training Loss: 0.6630 Validation Loss: 0.6721
731/1000:
Training Loss: 0.6622 Validation Loss: 0.6721
732/1000:
Training Loss: 0.6623 Validation Loss: 0.6721
733/1000:
Training Loss: 0.6621 Validation Loss: 0.6720
734/1000:
Training Loss: 0.6623 Validation Loss: 0.6720
735/1000:
Training Loss: 0.6621 Validation Loss: 0.6720
736/1000:
Training Loss: 0.6624 Validation Loss: 0.6720
737/1000:
Training Loss: 0.6625 Validation Loss: 0.6720
738/1000:
Training Loss: 0.6626 Validation Loss: 0.6720
739/1000:
Training Loss: 0.6629 Validation Loss: 0.6720
740/1000:
Training Loss: 0.6610 Validation Loss: 0.6720
741/1000:
Training Loss: 0.6625 Validation Loss: 0.6720
742/1000:
Training Loss: 0.6624 Validation Loss: 0.6720
743/1000:
Training Loss: 0.6624 Validation Loss: 0.6720
744/1000:
Training Loss: 0.6621 Validation Loss: 0.6720
745/1000:
Training Loss: 0.6611 Validation Loss: 0.6720
74

Training Loss: 0.6542 Validation Loss: 0.6604
992/1000:
Training Loss: 0.6544 Validation Loss: 0.6602
993/1000:
Training Loss: 0.6525 Validation Loss: 0.6600
994/1000:
Training Loss: 0.6524 Validation Loss: 0.6599
995/1000:
Training Loss: 0.6518 Validation Loss: 0.6597
996/1000:
Training Loss: 0.6520 Validation Loss: 0.6595
997/1000:
Training Loss: 0.6543 Validation Loss: 0.6593
998/1000:
Training Loss: 0.6527 Validation Loss: 0.6592
999/1000:
Training Loss: 0.6525 Validation Loss: 0.6590
1000/1000:
Training Loss: 0.6551 Validation Loss: 0.6588


In [33]:
from sklearn.metrics import accuracy_score
y_pred = second_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.600896860986547

In [29]:
y_pred

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

In [37]:
torch.save(second_model, 'my_model.pth')

In [36]:
second_model.load_state_dict()

TypeError: Module.load_state_dict() missing 1 required positional argument: 'state_dict'

In [39]:
torch.load('my_model.pth')

MyMLPModel(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=32, out_features=16, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=16, out_features=8, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=8, out_features=4, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.2, inplace=False)
    (15): Linear(in_features=4, out_features=1, bias=True)
    (16): Sigmoid()
  )
)