In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

### First, we load the titatic dataset and convert all categorical variables to one-hot

In [2]:
data_frame = pd.read_csv('titanic.csv')
data_frame = data_frame.drop(['Name', 'Ticket', 'Cabin'], axis=1)
data_frame = pd.get_dummies(data_frame, columns=['Pclass', 'Sex', 'Embarked'])
data_frame = data_frame.fillna(data_frame.mean())
data_frame

Unnamed: 0,PassengerId,Survived,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,1,0,22.000000,1,0,7.2500,False,False,True,False,True,False,False,True
1,2,1,38.000000,1,0,71.2833,True,False,False,True,False,True,False,False
2,3,1,26.000000,0,0,7.9250,False,False,True,True,False,False,False,True
3,4,1,35.000000,1,0,53.1000,True,False,False,True,False,False,False,True
4,5,0,35.000000,0,0,8.0500,False,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,27.000000,0,0,13.0000,False,True,False,False,True,False,False,True
887,888,1,19.000000,0,0,30.0000,True,False,False,True,False,False,False,True
888,889,0,29.699118,1,2,23.4500,False,False,True,True,False,False,False,True
889,890,1,26.000000,0,0,30.0000,True,False,False,False,True,True,False,False


### Next, we convert the dataset to a NumPy array and normalize it

In [3]:
labels = data_frame[['Survived']].to_numpy().astype(float)
observ = data_frame.drop(['PassengerId', 'Survived'], axis=1).to_numpy().astype(float)
X_train, X_test, y_train, y_test = train_test_split(observ, labels, test_size=0.25, random_state=42)

In [4]:
observ.shape

(891, 12)

In [5]:
# We have to normalize Age and Fare, so columns 0,3
age_min, age_max = X_train[:,0].min(), X_train[:,0].max()
fare_min, fare_max = X_train[:,3].min(), X_train[:,3].max()
# Apply
X_train[:,0] = (X_train[:,0] - age_min) / (age_max - age_min + 1e-5)
X_test[:,0] = (X_test[:,0] - age_min) / (age_max - age_min + 1e-5)
X_train[:,3] = (X_train[:,3] - fare_min) / (fare_max - fare_min + 1e-5)
X_test[:,3] = (X_test[:,3] - fare_min) / (fare_max - fare_min + 1e-5)

### Now, let's create our sequential model with four linear layers

In [6]:
import torch, torch.nn as nn, torch.nn.functional as F
first_model = [
    nn.Linear(X_train.shape[1], 256),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Dropout(),
    
    nn.Linear(64, 1),
    nn.Sigmoid()
]

### We can already apply it to our test dataset

In [11]:
inp = torch.tensor(X_test[:20], dtype=torch.float)
for layer in first_model:
    inp = layer(inp)
#first_model(torch.tensor(X_test[:20], dtype=torch.float))

In [12]:
inp

tensor([[0.4901],
        [0.4978],
        [0.4926],
        [0.4977],
        [0.4836],
        [0.5025],
        [0.5027],
        [0.5051],
        [0.5166],
        [0.5198],
        [0.4911],
        [0.4705],
        [0.5135],
        [0.5058],
        [0.5016],
        [0.4951],
        [0.4732],
        [0.4869],
        [0.4860],
        [0.4828]], grad_fn=<SigmoidBackward0>)

### Now, let's fit our model

In [13]:
first_model.fit(X_train, y_train, X_test, y_test)

AttributeError: 'list' object has no attribute 'fit'

### As you may have seen, this does not work. We have to implement the training ourselves!

In [14]:
class MyMLPModel(nn.Module):
    def __init__(self, input, *hidden_layers, lr=0.1, dropout=0.2):
        super().__init__() # <- Very important!
        self.lr = lr
        ## Build model
        n_neurons = [input] + list(hidden_layers)
        self.layers = []
        for i, o in zip(n_neurons[:-1], n_neurons[1:]):
            self.layers += [
                nn.Linear(i, o),
                nn.ReLU(),
                nn.Dropout(dropout),
            ]
        self.layers += [
            nn.Linear(hidden_layers[-1], 1), # Output layer with no final activation
            nn.Sigmoid(), # Final activation function
        ]
        
        self.layers = nn.Sequential(*self.layers) # Create a sequential model
        
    def forward(self, X):
        return self.layers(X)
    
    def predict(self, X, th=0.5):
        X = torch.tensor(X, dtype=torch.float)
        with torch.no_grad():
            y_hat = self(X)
        return (y_hat >= th).float()
    
    def train_step(self, X, y):
        y_hat = self(X)
        return F.binary_cross_entropy(y_hat, y)
        
    def validation_step(self, X, y):
        with torch.no_grad():
            return self.train_step(X, y)
    
    def configure_optimizers(self):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.lr)
        return optimizer
        
    def fit(self, X_train, y_train, X_valid, y_valid, epochs=10):
        ## Convert dataset
        X_train = torch.tensor(X_train, dtype=torch.float)
        y_train = torch.tensor(y_train, dtype=torch.float)
        
        X_valid = torch.tensor(X_valid, dtype=torch.float)
        y_valid = torch.tensor(y_valid, dtype=torch.float)
        
        ## Load Optimizer
        optimizer = self.configure_optimizers()
        
        for epoch in range(epochs):
            print(f'{epoch+1}/{epochs}:')
            # Training
            self.train() # Set model to training mode
            optimizer.zero_grad(set_to_none=True) # Sets all gradients to Zero (Default is to None) 
            loss = self.train_step(X_train, y_train) # Execute Forward pass and calculate Loss
            loss.backward() # Execute Backward pass
            optimizer.step() # Update weights
            self.eval() # Set model to validation mode
            
            # Validation
            loss_valid = self.validation_step(X_valid, y_valid)
            print(f'Training Loss: {loss.item():1.4f}', f'Validation Loss: {loss_valid.item():1.4f}')
            
        return self

### Create a model similar to the one before

In [15]:
second_model = MyMLPModel(X_train.shape[1], 32, 16, 8, 4)
second_model

MyMLPModel(
  (layers): Sequential(
    (0): Linear(in_features=12, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=16, out_features=8, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_features=8, out_features=4, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=4, out_features=1, bias=True)
    (13): Sigmoid()
  )
)

In [16]:
second_model.predict(X_test[:20])

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])

### Train it and calculate the test accuracy 

In [17]:
second_model = second_model.fit(X_train, y_train, X_test, y_test, epochs=1000)

1/1000:
Training Loss: 0.7043 Validation Loss: 0.7012
2/1000:
Training Loss: 0.7017 Validation Loss: 0.6991
3/1000:
Training Loss: 0.6997 Validation Loss: 0.6973
4/1000:
Training Loss: 0.6977 Validation Loss: 0.6955
5/1000:
Training Loss: 0.6953 Validation Loss: 0.6939
6/1000:
Training Loss: 0.6945 Validation Loss: 0.6923
7/1000:
Training Loss: 0.6911 Validation Loss: 0.6908
8/1000:
Training Loss: 0.6894 Validation Loss: 0.6894
9/1000:
Training Loss: 0.6886 Validation Loss: 0.6882
10/1000:
Training Loss: 0.6865 Validation Loss: 0.6870
11/1000:
Training Loss: 0.6853 Validation Loss: 0.6858
12/1000:
Training Loss: 0.6842 Validation Loss: 0.6848
13/1000:
Training Loss: 0.6818 Validation Loss: 0.6838
14/1000:
Training Loss: 0.6816 Validation Loss: 0.6829
15/1000:
Training Loss: 0.6808 Validation Loss: 0.6821
16/1000:
Training Loss: 0.6795 Validation Loss: 0.6813
17/1000:
Training Loss: 0.6783 Validation Loss: 0.6806
18/1000:
Training Loss: 0.6778 Validation Loss: 0.6799
19/1000:
Training L

In [18]:
from sklearn.metrics import accuracy_score
y_pred = second_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7892376681614349

In [19]:
y_pred

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

In [20]:
torch.save(second_model, 'my_model.pth')

In [22]:
#second_model.load_state_dict(state_dict='my_model.pth')

TypeError: Expected state_dict to be dict-like, got <class 'str'>.

In [None]:
#torch.load('my_model.pth')