<a href="https://colab.research.google.com/github/aminojagh/fast-ai/blob/main/NB4-NeuralNets-with-and-without-frameworks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Nets Without Frameworks

## Initial Setup

In [None]:
import os
from pathlib import Path
import torch, numpy as np, pandas as pd
from torch import tensor
from fastai.data.transforms import RandomSplitter
import torch.nn.functional as F

In [None]:
path = Path('titanic')
if not path.exists(): path.mkdir()
# then download titanic dataset from kaggle manually and upload it to this path

In [None]:
np.set_printoptions(linewidth=140)
torch.set_printoptions(linewidth=140, sci_mode=False, edgeitems=7)
pd.set_option('display.width', 140)

## Cleaning the data

In [None]:
df = pd.read_csv(path/'train.csv')

In [None]:
# df.isna().sum()

In [None]:
modes = df.mode().iloc[0]
df.fillna(modes, inplace=True)

In [None]:
# df.isna().sum()

In [None]:
# df.describe(include=(np.number))

In [None]:
# df['Fare'].hist();

In [None]:
df['LogFare'] = np.log(df['Fare']+1)

In [None]:
# df['LogFare'].hist();

In [None]:
# pclasses = sorted(df.Pclass.unique())
# pclasses

In [None]:
# df.describe(include=[object])

In [None]:
df = pd.get_dummies(df, columns=["Sex","Pclass","Embarked"])
added_cols = ['Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S']
indep_cols = ['Age', 'SibSp', 'Parch', 'LogFare'] + added_cols

## Setting up a linear model

In [None]:
def init_coeffs(n_coeff): return (torch.rand(n_coeff)-0.5).requires_grad_()

In [None]:
def update_coeffs(coeffs, lr):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [None]:
def calc_preds(coeffs, indeps): return torch.sigmoid((indeps*coeffs).sum(axis=1))
def calc_loss(coeffs, indeps, deps): return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

In [None]:
def one_epoch(coeffs, lr, indep, dep):
    loss = calc_loss(coeffs, indep, dep)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

In [None]:
def train_model(n_coeff, indep, dep, epochs=30, lr=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs(n_coeff)
    for i in range(epochs): one_epoch(coeffs, lr, indep, dep)
    return coeffs

## Training the linear model

In [None]:
t_indep = tensor(df[indep_cols].values.astype(float), dtype=torch.float)
t_dep = tensor(df.Survived)
t_indep.shape, t_dep.shape

In [None]:
n_coeff = t_indep.shape[1]

In [None]:
vals,indices = t_indep.max(dim=0)
t_indep = t_indep / vals

In [None]:
trn_split,val_split=RandomSplitter(seed=42)(df)
trn_indep,val_indep = t_indep[trn_split],t_indep[val_split]
trn_dep,val_dep = t_dep[trn_split],t_dep[val_split]
len(trn_indep),len(val_indep)

In [None]:
coeffs = train_model(n_coeff, trn_indep, trn_dep, 18, 0.2)

In [None]:
def show_coeffs(): return dict(zip(indep_cols, coeffs.requires_grad_(False)))
# why did we add this require_grad = false???
show_coeffs()

## Measuring accuracy

In [None]:
def acc(coeffs): return (val_dep.bool()==(calc_preds(coeffs, val_indep)>0.5)).float().mean()
acc(coeffs)

## Submitting to Kaggle

In [None]:
# tst_df = pd.read_csv(path/'test.csv')

# tst_df['Fare'] = tst_df.Fare.fillna(0)

# tst_df.fillna(modes, inplace=True)
# tst_df['LogFare'] = np.log(tst_df['Fare']+1)
# tst_df = pd.get_dummies(tst_df, columns=["Sex","Pclass","Embarked"])

# tst_indep = tensor(tst_df[indep_cols].values.astype(float), dtype=torch.float)
# tst_indep = tst_indep / vals

# tst_df['Survived'] = (calc_preds(tst_indep, coeffs)>0.5).int()

# sub_df = tst_df[['PassengerId','Survived']]
# sub_df.to_csv('sub.csv', index=False)

In [None]:
# !head sub.csv

## Using matrix product

In [None]:
def calc_preds(coeffs, indeps): return torch.sigmoid(indeps@coeffs)
def init_coeffs(n_coeff): return (torch.rand(n_coeff, 1)*0.1).requires_grad_()

trn_dep = trn_dep[:,None]
val_dep = val_dep[:,None]
# in order to add another dimension and make them 2D tensors

In [None]:
coeffs = train_model(n_coeff, trn_indep, trn_dep, 30, 100)
# notice how we change n_epochs and lr
acc(coeffs)

## Setting up a 1-layer NN

In [None]:
def init_coeffs(n_hidden=20):
    layer1 = (torch.rand(n_coeff, n_hidden)-0.5)/n_hidden
    # normalize by axis=1
    layer2 = torch.rand(n_hidden, 1)-0.3
    # 0.3 is heuristic
    const = torch.rand(1)[0]
    return layer1.requires_grad_(),layer2.requires_grad_(),const.requires_grad_()

def calc_preds(coeffs, indeps):
    l1,l2,const = coeffs
    res = F.relu(indeps@l1)
    res = res@l2 + const
    return torch.sigmoid(res)

def update_coeffs(coeffs, lr):
    for layer in coeffs:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [None]:
coeffs = train_model(n_coeff, trn_indep, trn_dep, 10, 10)
acc(coeffs)

## Setting up a deeper NN

In [None]:
def init_coeffs(n_coeff, hiddens = [10, 10]):  # <-- set this to the size of each hidden layer you want
    sizes = [n_coeff] + hiddens + [1]
    n = len(sizes)
    layers = [(torch.rand(sizes[i], sizes[i+1])-0.3)/sizes[i+1]*4 for i in range(n-1)]
    consts = [(torch.rand(1)[0]-0.5)*0.1 for i in range(n-1)]
    # these fixed numbers to adjust the initial values are heuristic
    for l in layers+consts: l.requires_grad_()
    return layers,consts

def calc_preds(coeffs, indeps):
    layers,consts = coeffs
    n = len(layers)
    res = indeps
    for i,l in enumerate(layers):
        res = res@l + consts[i]
        if i!=n-1: res = F.relu(res)
    return torch.sigmoid(res)

def update_coeffs(coeffs, lr):
    layers,consts = coeffs
    for layer in layers+consts:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [None]:
coeffs = train_model(n_coeff, trn_indep, trn_dep, 20, 1)
# a much smaller lr works
acc(coeffs)

# Using a Framework

## Initial set up

In [None]:
from fastai.tabular.all import (
    set_seed, RandomSplitter,
    TabularPandas,
    CategoryBlock,
    Categorify, FillMissing, Normalize,
    tabular_learner,
    slide, valley,
    accuracy
)

In [None]:
pd.options.display.float_format = '{:.2f}'.format
set_seed(42)

## Prep the data

In [None]:
def add_features(df):
    df['LogFare'] = np.log1p(df['Fare'])
    df['Deck'] = df.Cabin.str[0].map(dict(A="ABC", B="ABC", C="ABC", D="DE", E="DE", F="FG", G="FG"))
    df['Family'] = df.SibSp+df.Parch
    df['Alone'] = df.Family==1
    df['TicketFreq'] = df.groupby('Ticket')['Ticket'].transform('count')
    df['Title'] = df.Name.str.split(', ', expand=True)[1].str.split('.', expand=True)[0]
    df['Title'] = df.Title.map(dict(Mr="Mr",Miss="Miss",Mrs="Mrs",Master="Master")).value_counts(dropna=False)
    return df

In [None]:
df = pd.read_csv(path/'train.csv')
df = add_features(df)

In [None]:
splits = RandomSplitter(seed=42)(df)
dls = TabularPandas(
    df,
    splits=splits,
    procs = [Categorify, FillMissing, Normalize],
    cat_names=["Sex","Pclass","Embarked","Deck", "Title"],
    cont_names=['Age', 'SibSp', 'Parch', 'LogFare', 'Alone', 'TicketFreq', 'Family'],
    y_names="Survived",
    y_block = CategoryBlock(),
).dataloaders()

## Train the model

In [None]:
learn = tabular_learner(dls, metrics=accuracy, layers=[10,10])

In [None]:
learn.lr_find(suggest_funcs=(slide, valley))

In [None]:
learn.fit(16, lr=0.05)

## Submit to Kaggle

In [None]:
tst_df = pd.read_csv(path/'test.csv')
tst_df['Fare'] = tst_df.Fare.fillna(0)
add_features(tst_df)

tst_dl = learn.dls.test_dl(tst_df)

# preds,_ = learn.get_preds(dl=tst_dl)

# tst_df['Survived'] = (preds[:,1]>0.5).int()
# sub_df = tst_df[['PassengerId','Survived']]
# sub_df.to_csv('sub.csv', index=False)

In [None]:
# !head sub.csv

## Ensembling

In [None]:
def ensemble():
    learn = tabular_learner(dls, metrics=accuracy, layers=[10,10])
    with learn.no_bar(),learn.no_logging(): learn.fit(16, lr=0.03)
    return learn.get_preds(dl=tst_dl)[0]

learns = [ensemble() for _ in range(5)]
ens_preds = torch.stack(learns).mean(0)

tst_df['Survived'] = (ens_preds[:,1]>0.5).int()
sub_df = tst_df[['PassengerId','Survived']]
sub_df.to_csv('ens_sub.csv', index=False)