# Titanic: Survival Model

Build and train a model to predict survival on the Titanic using a [cleaned and split dataset](https://huggingface.co/datasets/jamieoliver/titanic-2410), and upload the model to Hugging Face.

Based on https://github.com/fastai/course22/blob/master/clean/05-linear-model-and-neural-net-from-scratch.ipynb using the dataset from https://www.kaggle.com/competitions/titanic.

Plan
- [x] Download [cleaned and split dataset](https://huggingface.co/datasets/jamieoliver/titanic-2410) from Hugging Face
- [x] Prepare data for model
    - [x] Load training dataset as PyTorch tensors
    - [x] Normalise training dataset
- [x] Train model
    - [x] Set up coefficients
    - [x] Set up gradient descent step
    - [x] Run training loop
- [ ] Test model
- [ ] Upload model to Hugging Face

##  Download Dataset from Hugging Face

In [1]:
from datasets import *

datasetDict = load_dataset('jamieoliver/titanic-2410')
datasetDict

DatasetDict({
    train: Dataset({
        features: ['PassengerId', 'Survived', 'Name', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'LogFare', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male', 'Embarked_C', 'Embarked_Q', 'Embarked_S'],
        num_rows: 712
    })
    validation: Dataset({
        features: ['PassengerId', 'Survived', 'Name', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'LogFare', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male', 'Embarked_C', 'Embarked_Q', 'Embarked_S'],
        num_rows: 179
    })
    test: Dataset({
        features: ['PassengerId', 'Survived', 'Name', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'LogFare', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male', 'Embarked_C', 'Embarked_Q', 'Embarked_S'],
        num_rows: 418
    })
})

## Prepare Data for Model

### Load Training Dataset as PyTorch Tensors

In [2]:
import torch
from torch import tensor

torch.set_printoptions(linewidth=120, edgeitems=10)

In [3]:
train_dataset = datasetDict['train']
train_dataset

Dataset({
    features: ['PassengerId', 'Survived', 'Name', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'LogFare', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male', 'Embarked_C', 'Embarked_Q', 'Embarked_S'],
    num_rows: 712
})

The dependent variable is the variable we are predicting i.e. `survived`.

In [4]:
dependent_var = tensor(train_dataset.to_pandas()['Survived'].values, dtype=torch.float)
dependent_var

tensor([0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1.,
        0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 1.,
        0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0.,
        1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0.,
        1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1.,
        1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0.,
        1., 1., 1., 1., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
        0., 1., 1., 0., 1., 0., 0., 0., 

In [5]:
dependent_var.shape

torch.Size([712])

The independent variables are the variables we will use to make the prediction. Note that we use a trick in mutiplying the Pandas DataFrame by 1 to convert booleans to integers.

In [6]:
independent_cols = ['Age', 'SibSp', 'Parch', 'LogFare', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Sex_female', 'Sex_male',
                    'Embarked_C', 'Embarked_Q', 'Embarked_S']

independent_vars = tensor((train_dataset.to_pandas()*1)[independent_cols].values, dtype=torch.float)
independent_vars

tensor([[22.0000,  0.0000,  0.0000,  2.0949,  0.0000,  0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [24.0000,  2.0000,  0.0000,  4.3108,  0.0000,  1.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [24.0000,  0.0000,  0.0000,  5.4316,  1.0000,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,  0.0000,  0.0000],
        [32.0000,  0.0000,  0.0000,  4.3476,  1.0000,  0.0000,  0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000],
        [30.0000,  0.0000,  0.0000,  2.1102,  0.0000,  0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  1.0000],
        [18.0000,  0.0000,  2.0000,  2.6391,  0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [47.0000,  1.0000,  1.0000,  3.9807,  1.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [34.0000,  0.0000,  0.0000,  2.4423,  0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  1.0000],
        [39.0000,  1.0000,  1.0000,  4.7175,  1.0000,  0

In [7]:
independent_vars.shape

torch.Size([712, 12])

### Normalise Training Dataset

In [8]:
max_vals,indices = independent_vars.max(dim=0)
independent_vars /= max_vals
independent_vars

tensor([[0.2973, 0.0000, 0.0000, 0.3357, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000],
        [0.3243, 0.2500, 0.0000, 0.6907, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000],
        [0.3243, 0.0000, 0.0000, 0.8703, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.0000],
        [0.4324, 0.0000, 0.0000, 0.6966, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [0.4054, 0.0000, 0.0000, 0.3381, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000],
        [0.2432, 0.0000, 0.3333, 0.4229, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.6351, 0.1250, 0.1667, 0.6378, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.4595, 0.0000, 0.0000, 0.3913, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.5270, 0.1250, 0.1667, 0.7559, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [0.6757, 0.0000, 0.1667, 0.8838, 1.000

## Train Model

### Set Up Coefficients

Initialise random coefficients as a column vector.

In [9]:
num_coeffs = independent_vars.shape[1]
torch.manual_seed(42)
coeffs = torch.rand(num_coeffs, 1) - 0.5
coeffs

tensor([[ 0.3823],
        [ 0.4150],
        [-0.1171],
        [ 0.4593],
        [-0.1096],
        [ 0.1009],
        [-0.2434],
        [ 0.2936],
        [ 0.4408],
        [-0.3668],
        [ 0.4346],
        [ 0.0936]])

Transpose the dependent variable into a column vector.

In [10]:
dependent_var = dependent_var[:,None]
dependent_var[:10]

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])

In [11]:
predictions = independent_vars@coeffs
predictions[:10]

tensor([[0.5588],
        [1.1802],
        [0.4881],
        [0.3025],
        [0.6012],
        [0.7363],
        [0.8458],
        [0.8435],
        [0.3983],
        [0.4620]])

In [12]:
loss = torch.abs(predictions - dependent_var).mean()
loss

tensor(0.5760)

In [13]:
def calc_predictions(coeffs, independent_vars):
    return independent_vars@coeffs

def calc_loss(coeffs, independent_vars, dependent_var):
    return torch.abs(calc_predictions(coeffs, independent_vars) - dependent_var).mean()
    

### Set Up Gradient Descent Step

In [14]:
coeffs.requires_grad_()

tensor([[ 0.3823],
        [ 0.4150],
        [-0.1171],
        [ 0.4593],
        [-0.1096],
        [ 0.1009],
        [-0.2434],
        [ 0.2936],
        [ 0.4408],
        [-0.3668],
        [ 0.4346],
        [ 0.0936]], requires_grad=True)

In [15]:
loss = calc_loss(coeffs, independent_vars, dependent_var)
loss

tensor(0.5760, grad_fn=<MeanBackward0>)

In [16]:
loss.backward()
coeffs.grad

tensor([[ 0.1188],
        [ 0.0230],
        [ 0.0054],
        [ 0.0838],
        [-0.0407],
        [ 0.0295],
        [ 0.2865],
        [-0.1587],
        [ 0.4340],
        [-0.0351],
        [ 0.0393],
        [ 0.2711]])

In [17]:
loss = calc_loss(coeffs, independent_vars, dependent_var)
loss.backward()
with torch.no_grad():
    coeffs.sub_(coeffs.grad * 0.1)
    coeffs.grad.zero_()
    print(calc_loss(coeffs, independent_vars, dependent_var))

tensor(0.4996)


### Run Training Loop

In [18]:
def update_coeffs(coeffs, learning_rate):
    coeffs.sub_(coeffs.grad * learning_rate)
    coeffs.grad.zero_()

In [19]:
def one_epoch(coeffs, learning_rate):
    loss = calc_loss(coeffs, independent_vars, dependent_var)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, learning_rate)
    print(f'{loss:.3f}', end='; ')

In [20]:
def init_coeffs():
    return (torch.rand(num_coeffs, 1) - 0.5).requires_grad_()

In [21]:
def train_model(epochs=30, learning_rate=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range (epochs):
        one_epoch(coeffs, learning_rate)
        
    return coeffs

In [22]:
coeffs = train_model(epochs=20, learning_rate=0.2)

0.708; 0.660; 0.621; 0.584; 0.548; 0.513; 0.477; 0.442; 0.407; 0.374; 0.342; 0.315; 0.293; 0.277; 0.266; 0.260; 0.270; 0.292; 0.273; 0.295; 

In [23]:
def show_coeffs():
    return dict(zip(independent_cols, coeffs.requires_grad_(False)))

show_coeffs()

{'Age': tensor([-0.2781]),
 'SibSp': tensor([0.0818]),
 'Parch': tensor([0.2151]),
 'LogFare': tensor([7.2036e-05]),
 'Pclass_1': tensor([0.1255]),
 'Pclass_2': tensor([0.0336]),
 'Pclass_3': tensor([-0.0669]),
 'Sex_female': tensor([0.8136]),
 'Sex_male': tensor([-0.0375]),
 'Embarked_C': tensor([0.0867]),
 'Embarked_Q': tensor([0.1575]),
 'Embarked_S': tensor([0.0721])}