## Introduction

In [None]:
import os
from pathlib import Path

iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if iskaggle: path = Path('../input/titanic')
else:
    path = Path('titanic')
    if not path.exists():
        import zipfile,kaggle
        kaggle.api.competition_download_cli(str(path))
        zipfile.ZipFile(f'{path}.zip').extractall(path)

In [None]:
import torch, numpy as np, pandas as pd
np.set_printoptions(linewidth=140)
torch.set_printoptions(linewidth=140, sci_mode=False, edgeitems=7)
pd.set_option('display.width', 140)

## Cleaning the data

In [None]:
df = pd.read_csv(path/'train.csv')
df

In [None]:
df.isna().sum()

In [None]:
modes = df.mode().iloc[0]
modes

In [None]:
df.fillna(modes, inplace=True)

In [None]:
df.isna().sum()

In [None]:
import numpy as np

df.describe(include=(np.number))

In [None]:
df['Fare'].hist();

In [None]:
df['LogFare'] = np.log(df['Fare']+1)

In [None]:
df['LogFare'].hist();

In [None]:
pclasses = sorted(df.Pclass.unique())
pclasses

In [None]:
df.describe(include=[object])

In [None]:
df = pd.get_dummies(df, columns=["Sex","Pclass","Embarked"])
df.columns

In [None]:
added_cols = ['Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S']
df[added_cols].head()

In [None]:
from torch import tensor

t_dep = tensor(df.Survived)

In [None]:
indep_cols = ['Age', 'SibSp', 'Parch', 'LogFare'] + added_cols
# fastai/pytorch bug where bool is treated as object add below 3 lines code to resolve error
for n in df:
    if pd.api.types.is_bool_dtype(df[n]):
        df[n] = df[n].astype('uint8')
t_indep = tensor(df[indep_cols].values, dtype=torch.float)
t_indep

In [None]:
t_indep.shape

## Setting up a linear model

In [None]:
torch.manual_seed(442)

n_coeff = t_indep.shape[1]
coeffs = torch.rand(n_coeff)-0.5
coeffs

In [None]:
t_indep*coeffs

In [None]:
vals,indices = t_indep.max(dim=0)
t_indep = t_indep / vals

In [None]:
t_indep*coeffs

In [None]:
preds = (t_indep*coeffs).sum(axis=1)

In [None]:
preds[:10]

In [None]:
loss = torch.abs(preds-t_dep).mean()
loss

In [None]:
def calc_preds(coeffs, indeps): return (indeps*coeffs).sum(axis=1)
def calc_loss(coeffs, indeps, deps): return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

## Doing a gradient descent step

In [None]:
coeffs.requires_grad_()

In [None]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss

In [None]:
loss.backward()

In [None]:
coeffs.grad

In [None]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
coeffs.grad

In [None]:
loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
with torch.no_grad():
    coeffs.sub_(coeffs.grad * 0.1)
    coeffs.grad.zero_()
    print(calc_loss(coeffs, t_indep, t_dep))

## Training the linear model

In [None]:
from fastai.data.transforms import RandomSplitter
trn_split,val_split=RandomSplitter(seed=42)(df)

In [None]:
trn_indep,val_indep = t_indep[trn_split],t_indep[val_split]
trn_dep,val_dep = t_dep[trn_split],t_dep[val_split]
len(trn_indep),len(val_indep)

In [None]:
def update_coeffs(coeffs, lr):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [None]:
def one_epoch(coeffs, lr):
    loss = calc_loss(coeffs, trn_indep, trn_dep)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

In [None]:
def init_coeffs(): return (torch.rand(n_coeff)-0.5).requires_grad_()

In [None]:
def train_model(epochs=30, lr=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range(epochs): one_epoch(coeffs, lr=lr)
    return coeffs

In [None]:
coeffs = train_model(18, lr=0.2)

In [None]:
def show_coeffs(): return dict(zip(indep_cols, coeffs.requires_grad_(False)))
show_coeffs()

## Measuring accuracy

In [None]:
preds = calc_preds(coeffs, val_indep)

In [None]:
results = val_dep.bool()==(preds>0.5)
results[:16]

In [None]:
results.float().mean()

In [None]:
def acc(coeffs): return (val_dep.bool()==(calc_preds(coeffs, val_indep)>0.5)).float().mean()
acc(coeffs)

## Using sigmoid

In [None]:
preds[:28]

In [None]:
import sympy
sympy.plot("1/(1+exp(-x))", xlim=(-5,5));

In [None]:
def calc_preds(coeffs, indeps): return torch.sigmoid((indeps*coeffs).sum(axis=1))

In [None]:
coeffs = train_model(lr=100)

In [None]:
acc(coeffs)

In [None]:
show_coeffs()

## Submitting to Kaggle

In [None]:
tst_df = pd.read_csv(path/'test.csv')

In [None]:
tst_df['Fare'] = tst_df.Fare.fillna(0)

In [None]:
tst_df.fillna(modes, inplace=True)
tst_df['LogFare'] = np.log(tst_df['Fare']+1)
tst_df = pd.get_dummies(tst_df, columns=["Sex","Pclass","Embarked"])
# fastai/pytorch bug where bool is treated as object
for n in tst_df:
    if pd.api.types.is_bool_dtype(tst_df[n]):
        tst_df[n] = tst_df[n].astype('uint8')
tst_indep = tensor(tst_df[indep_cols].values, dtype=torch.float)
tst_indep = tst_indep / vals

In [None]:
tst_df['Survived'] = (calc_preds(tst_indep, coeffs)>0.5).int()

In [None]:
sub_df = tst_df[['PassengerId','Survived']]
sub_df.to_csv('sub.csv', index=False)

In [None]:
!head sub.csv

## Using matrix product

In [None]:
(val_indep*coeffs).sum(axis=1)

In [None]:
val_indep@coeffs

In [None]:
def calc_preds(coeffs, indeps): return torch.sigmoid(indeps@coeffs)

In [None]:
def init_coeffs(): return (torch.rand(n_coeff, 1)*0.1).requires_grad_()

In [None]:
trn_dep = trn_dep[:,None]
val_dep = val_dep[:,None]

In [None]:
coeffs = train_model(lr=100)

In [None]:
acc(coeffs)

## A neural network

In [None]:
def init_coeffs(n_hidden=20):
    layer1 = (torch.rand(n_coeff, n_hidden)-0.5)/n_hidden
    layer2 = torch.rand(n_hidden, 1)-0.3
    const = torch.rand(1)[0]
    return layer1.requires_grad_(),layer2.requires_grad_(),const.requires_grad_()

In [None]:
import torch.nn.functional as F

def calc_preds(coeffs, indeps):
    l1,l2,const = coeffs
    res = F.relu(indeps@l1)
    res = res@l2 + const
    return torch.sigmoid(res)

In [None]:
def update_coeffs(coeffs, lr):
    for layer in coeffs:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [None]:
coeffs = train_model(lr=1.4)

In [None]:
coeffs = train_model(lr=20)

In [None]:
acc(coeffs)

## Deep learning

In [None]:
def init_coeffs():
    hiddens = [10, 10]  # <-- set this to the size of each hidden layer you want
    sizes = [n_coeff] + hiddens + [1]
    n = len(sizes)
    layers = [(torch.rand(sizes[i], sizes[i+1])-0.3)/sizes[i+1]*4 for i in range(n-1)]
    consts = [(torch.rand(1)[0]-0.5)*0.1 for i in range(n-1)]
    for l in layers+consts: l.requires_grad_()
    return layers,consts

In [None]:
import torch.nn.functional as F

def calc_preds(coeffs, indeps):
    layers,consts = coeffs
    n = len(layers)
    res = indeps
    for i,l in enumerate(layers):
        res = res@l + consts[i]
        if i!=n-1: res = F.relu(res)
    return torch.sigmoid(res)

In [None]:
def update_coeffs(coeffs, lr):
    layers,consts = coeffs
    for layer in layers+consts:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [None]:
coeffs = train_model(lr=4)

In [None]:
acc(coeffs)

## Final thoughts