In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

%matplotlib inline
sns.set_style('darkgrid')

In [2]:
mit_train = pd.read_csv('./data/mitbih_train.csv', header=None)
mit_test = pd.read_csv('./data/mitbih_test.csv', header=None)

In [3]:
# Separate target from data
y_train = mit_train[187]
X_train = mit_train.loc[:, :186]

y_test = mit_test[187]
X_test = mit_test.loc[:, :186]

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [5]:
X_train, y_train, X_test, y_test = map(
    torch.from_numpy, 
    (X_train.values, y_train.values, X_test.values, y_test.values)
)

In [6]:
# Convert to 3D tensor
X_train = X_train.unsqueeze(1)
X_test = X_test.unsqueeze(1)

In [7]:
# Batch size
bs = 128

train_ds = TensorDataset(X_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

test_ds = TensorDataset(X_test, y_test)
test_dl = DataLoader(test_ds, batch_size=bs * 2)

In [8]:
import time
from models import norm, ResBlock, ODEfunc, ODENet, Flatten, count_parameters

In [9]:
# Helpers adapted from https://pytorch.org/tutorials/beginner/nn_tutorial.html

def get_model(is_odenet=True, dim=64, adam=False, **kwargs):
    """
    Initialize ResNet or ODENet with optimizer.
    """
    downsampling_layers = [
        nn.Conv1d(1, dim, 3, 1),
        norm(dim),
        nn.ReLU(inplace=True),
        nn.Conv1d(dim, dim, 4, 2, 1),
        norm(dim),
        nn.ReLU(inplace=True),
        nn.Conv1d(dim, dim, 4, 2, 1)
    ]

    feature_layers = [ODENet(ODEfunc(dim), **kwargs)] if is_odenet else [ResBlock(dim) for _ in range(6)]

    fc_layers = [norm(dim), nn.ReLU(inplace=True), nn.AdaptiveAvgPool1d(1), Flatten(), nn.Linear(dim, 5)]

    model = nn.Sequential(*downsampling_layers, *feature_layers, *fc_layers)

    opt = optim.Adam(model.parameters()) if adam else optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

    return model, opt


def loss_batch(model, loss_func, xb, yb, opt=None):
    """
    Calculate loss and update weights if training.
    """
    loss = loss_func(model(xb.float()), yb.long())

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)


def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    """
    Train neural network model.
    """
    num_batches = len(train_dl)
    
    for epoch in range(epochs):
        print(f"Training... epoch {epoch + 1}")
        
        model.train()   # Set model to training mode
        batch_count = 0
        start = time.time()
        for xb, yb in train_dl:
            batch_count += 1
            curr_time = time.time()
            percent = round(batch_count/len(train_dl) * 100, 1)
            elapsed = round((curr_time - start)/60, 1)
            print(f"    Percent trained: {percent}%  Time elapsed: {elapsed} min", end='\r')
            loss_batch(model, loss_func, xb, yb, opt)
            
            

        model.eval()    # Set model to validation mode
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(f"\n    val loss: {round(val_loss, 2)}\n")

In [10]:
odenet, odeopt = get_model(adam=False, rtol=1e-3, atol=1e-3)

In [11]:
resnet, resopt = get_model(is_odenet=False, adam=False)

In [12]:
fit(5, resnet, F.cross_entropy, resopt, train_dl, test_dl)

Training... epoch 1
    Percent trained: 100.0%  Time elapsed: 11.0 min
    val loss: 0.36

Training... epoch 2
    Percent trained: 100.0%  Time elapsed: 10.8 min
    val loss: 0.79

Training... epoch 3
    Percent trained: 100.0%  Time elapsed: 10.4 min
    val loss: 0.23

Training... epoch 4
    Percent trained: 100.0%  Time elapsed: 10.1 min
    val loss: 0.13

Training... epoch 5
    Percent trained: 100.0%  Time elapsed: 10.1 min
    val loss: 0.1



In [13]:
fit(5, odenet, F.cross_entropy, odeopt, train_dl, test_dl)

Training... epoch 1
    Percent trained: 100.0%  Time elapsed: 61.9 min
    val loss: 0.23

Training... epoch 2
    Percent trained: 100.0%  Time elapsed: 71.2 min
    val loss: 0.14

Training... epoch 3
    Percent trained: 100.0%  Time elapsed: 73.6 min
    val loss: 0.12

Training... epoch 4
    Percent trained: 100.0%  Time elapsed: 80.5 min
    val loss: 0.09

Training... epoch 5
    Percent trained: 100.0%  Time elapsed: 97.4 min
    val loss: 0.09



In [14]:
def accuracy(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        logits = model(X_test.float())
    preds = torch.argmax(F.softmax(logits, dim=1), axis=1).numpy()
    return (preds == y_test.numpy()).mean()

In [15]:
print(f"ResNet accuracy: {round(accuracy(resnet, X_test, y_test), 3)}")
print(f"ODENet accuracy: {round(accuracy(odenet, X_test, y_test), 3)}")

ResNet accuracy: 0.974
ODENet accuracy: 0.976


In [17]:
# Baseline
round(mit_test[187].value_counts(normalize=True).sort_index(), 2)

0.0    0.83
1.0    0.03
2.0    0.07
3.0    0.01
4.0    0.07
Name: 187, dtype: float64

Both models perform well on the test set with accuracies above 97%. This is significantly above the baseline accuracy of 83%. Therefore, the models both generalize well. The ResNet trained for only an hour while the ODENet trained for over seven hours. However, a benefit of the ODENet can be seen below. It has almost exactly 1/3 of the parameters as the ResNet and yet performed slightly better. This leads to the fact that Neural ODEs use constant memory (although with a high memory overhead due to the adjoint method). 

In [18]:
print("Number of tunable parameters in...")
print(f"    ResNet: {count_parameters(resnet)}")
print(f"    ODENet: {count_parameters(odenet)}")

Number of tunable parameters in...
    ResNet: 182853
    ODENet: 59333
