In [1]:
!black *.py

[1mAll done! ✨ 🍰 ✨[0m
5 files left unchanged.[0m


In [2]:
import functools

import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

import nbfigtulz as ftl
import numpy as np
import torch

from data import generate_data
import loss as loss_fct
import utils

# Setup

Delete remnants from previous runs and start with a fresh `img/` directory:

In [3]:
!rm -rf img && mkdir img/

Fix the seed used by `PyTorch` and the random number generator of our own algorithms which create the synthetic data sample.

In [4]:
SEED = 42
torch.manual_seed(SEED);

In order to see a drop in the epistemic uncertainty the size of the data sample shouldn't be much larger than 300. (Data samples below 300 suffer from some serious under-fitting.)

In [5]:
N_DATA = 300

Helper function which creates and compiles an 2D EDL model.

In [6]:
def get_model(debug=False, **kwargs):
    from edl import EDL
    m = EDL(**kwargs)
        
    if debug:
        return m
    
    return torch.jit.script(m)

# Synthetic data sample

Let's create a new data sample. The data points follow a spiral distribution in $(t,x,y)$,
$$x = r \cos t \,, \\ y = r \sin t \,,$$
with $r \sim \mathcal{N}(\mu=1, \sigma^2=0.1)$ and $t \in [0, 2\pi]$ is distributed according to a v-shape distribution,
$$f(t) \propto \begin{cases} 1 - t/\pi & \text{if } t \le \pi \,, \\ t/\pi - 1 & \text{else.} \end{cases}$$

In [7]:
data = generate_data(N_DATA, std=.1, seed=SEED)
print(data.shape)

torch.Size([300, 3])


In [8]:
@ftl.with_context
def make_fig(x, y, z):
    fig = plt.figure()
    ax = fig.add_subplot()
    ax.scatter(x, y, c=z, cmap='copper', alpha=.2)
    ax.set_xlim(-1.4, 1.4)
    ax.set_ylim(-1.4, 1.4)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    
    ax.set_xticks([-1, 0, 1])
    ax.set_yticks([-1, 0, 1])
    
    ax.grid()
    
    w, h = ftl.fig_size.SMALL.get_size()
    return ftl.save_fig(fig, 'data_xy', resize=(h, h))
    
    
make_fig(data[:, 1], data[:, 2], data[:, 0])

img/data_xy.png
img/data_xy.pgf


data_xy.png

In [9]:
@ftl.with_context
def make_fig(x, y, z):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.view_init(10, 300)
    ax.set_box_aspect((5, 5, 5))
    
    ax.scatter(x, y, z, '.', alpha=.15, c=z, cmap='copper')
    
    ax.set_xlim(-1.4, 1.4)
    ax.set_ylim(-1.4, 1.4)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.set_zlabel('$t$')
    
    ax.set_xticks([-1, 0, 1])
    ax.set_yticks([-1, 0, 1])
        
    ax.set_zticks([0, np.pi, 2 * np.pi])
    ax.set_zticklabels(['$0$', r'$\pi$', r'$2\pi$'])
    
    ax.grid(False)
    
    return ftl.save_fig(fig, 'data', tight_layout=False)
    
make_fig(data[:, 1], data[:, 2], data[:, 0])

img/data.png
img/data.pgf


data.png

Trim white borders using [`convert`](https://imagemagick.org/script/convert.php). One can safely remove this line if tool is not installed...

In [10]:
!convert img/data.png -flatten -fuzz 1% -trim +repage img/data.png

# EDL

We now train our simple model using a learning schedule and the Adam optimizer for 1500 epochs.

In [11]:
train_dl, test_dl = utils.get_dataloader(data, batch_size=100, p_test=.1)
model = get_model()

loss_train = []
loss_test = []
n_epochs = 1500
lr_schedule = [.01, .001, .0001]

optimizer = torch.optim.Adam(model.parameters())

for i in range(n_epochs):
    lr_idx = (i * len(lr_schedule)) // n_epochs
    lr = lr_schedule[lr_idx]
    for g in optimizer.param_groups:
        g['lr'] = lr
    
    loss = utils.fit_epoch(model, loss_fct.student_nll_mean, optimizer, train_dl=train_dl, test_dl=test_dl)
    loss_train.append(loss[0])
    loss_test.append(loss[1])

In [12]:
@ftl.with_context
def make_fig(*, loss_train, loss_test):
    fig, ax = plt.subplots()
    ax.plot(loss_train, linewidth=2, label='Train', alpha=.5)
    ax.plot(loss_test, linewidth=2, label='Test', alpha=.5)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Loss')
    ax.legend()
    return ftl.save_fig(fig, 'loss')
    

make_fig(loss_train=loss_train, loss_test=loss_test)

img/loss.png
img/loss.pgf


loss.png

After training we end up with a slightly over-trained model which is OK if we want to witness some non-zero epistemic uncertainty.

Next, we train 100 models and extract the parameters of the evidential distribution for each of them. To do so we query the trained models for different values of $t$ and and calculate $\mu$, $\Sigma_0$ and $\nu$ at each point.

In [13]:
def get_params(model, x_min=0., x_max=2. * np.pi):
    model.eval()
    with torch.no_grad():
        t = torch.linspace(x_min, x_max, 500)
        gt = torch.stack((torch.cos(t), torch.sin(t)), 1)
        y = model(t).cpu()
        
        batch_size = t.shape[0]

        mu = torch.empty(batch_size, 2)
        mu[:, 0] = y[:, 0]
        mu[:, 1] = y[:, 1]
        
        L = torch.zeros(batch_size, 2, 2)
        L[:, 0, 0] = y[:, 2]
        L[:, 1, 0] = y[:, 3]
        L[:, 1, 1] = y[:, 4]
        sigma = torch.matmul(L, L.transpose(1, 2))
        
        nu = y[:, 5]
        
        loss = loss_fct.student_nll(y, gt)
        
    return t, {'mu': mu.cpu().numpy(), 'sigma': sigma.cpu().numpy(), 'nu': nu.cpu().numpy()}, loss.cpu().numpy()

In [14]:
def train_edl(*, n_repeat=1, data, verbose=False):
    x = None
    ms = []
    losses = []
    
    n_epochs = 1500
    lr_schedule = [.01, .001, .0001]
    
    for i in range(n_repeat):
        if verbose:
            print(f'{i} / {n_repeat}')
        
        dl = torch.utils.data.DataLoader(data, batch_size=100, shuffle=True)

        model = get_model()
        optimizer = torch.optim.Adam(model.parameters())

        for i in range(n_epochs):
            lr_idx = (i * len(lr_schedule)) // n_epochs
            lr = lr_schedule[lr_idx]
            for g in optimizer.param_groups:
                g['lr'] = lr

            _ = utils.fit_epoch(model,
                                loss_fct.student_nll_mean,
                                optimizer,
                                train_dl=train_dl,
                                test_dl=None)
            
        x, m, loss = get_params(model)
        ms.append(m)
        losses.append(loss)
    
    if n_repeat == 1:
        return x, ms[0], losses[0]
    
    return x, ms, losses


x, ms, loss = train_edl(n_repeat=100, data=generate_data(N_DATA, std=.1, seed=SEED))

The container `ms` wraps all paramters of the 100 learned evidential distributions:

In [15]:
len(ms), ms[0].keys(), ms[0]['mu'].shape

(100, dict_keys(['mu', 'sigma', 'nu']), (500, 2))

... let's write a small helper function to extract a single parameter by name / key:

In [16]:
def get_from_ms(ms, *, key):
    return [m[key] for m in ms]

Next, we generate visualizations of $\mu$, $\Sigma_0$ and $\nu$...

In [17]:
@ftl.with_context
def make_fig(mus):
    fig = plt.figure()
    ax = fig.add_subplot()
    
    alpha = 10. / len(mus)
    for mu in mus:
        n = len(mu[:, 0])
        colors = plt.cm.copper(np.linspace(0, 1, 100))
        dx = n // len(colors)
        for i, j in enumerate(range(0, n, dx)):
            first = j
            last = min(j + dx, n) - 1
            c = (*colors[i][0:3].tolist(), alpha)
            ax.plot(mu[first:last, 0], mu[first:last, 1], color=c)

    ax.set_xlim(-1.4, 1.4)
    ax.set_ylim(-1.4, 1.4)
    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    
    ax.set_xticks([-1, 0, 1])
    ax.set_yticks([-1, 0, 1])
    
    ax.grid()
    
    w, h = ftl.fig_size.SMALL.get_size()
    return ftl.save_fig(fig, 'model_xy', resize=(h, h))
    

xy_fig = make_fig(get_from_ms(ms, key='mu'))

img/model_xy.png
img/model_xy.pgf


In [18]:
def fmt_t_axis(ax):
    ax.set_xlim(0, 2. * np.pi)
    ax.set_xticks([0, .5 * np.pi, np.pi, 1.5 * np.pi, 2 * np.pi])
    ax.set_xticklabels(['$0$', r'$0.5 \, \pi$', r'$\pi$', r'$1.5 \, \pi$', r'$2\pi$'])
    ax.set_xlabel(r'$t$')

In [19]:
@ftl.with_context
def make_fig(x, nus):
    fig = plt.figure()
    ax = fig.add_subplot()
    
    alpha = 5. / len(nus)
    for nu in nus:
        ax.plot(x, nu, color='C0', alpha=alpha)
    
    fmt_t_axis(ax)
    ax.set_ylabel(r'$\nu$')
    ax.grid()
    
    w, h = ftl.fig_size.SMALL.get_size()
    return ftl.save_fig(fig, 'nu', resize=(h, h))
    

nu_fig = make_fig(x, get_from_ms(ms, key='nu'))

img/nu.png
img/nu.pgf


In [20]:
@ftl.with_context
def make_fig(x, sigmas):
    fig = plt.figure()
    ax = fig.add_subplot()
    
    alpha = 5. / len(sigmas)
    for sigma in sigmas:
        ax.plot(x, sigma[:, 0, 0], color='C0', alpha=alpha)      
        ax.plot(x, sigma[:, 1, 1], color='C1', alpha=alpha)

    fmt_t_axis(ax)
    ax.set_ylim(0, .025)
    ax.set_ylabel(r'$\mathrm{cov}(x, y)$')
    ax.grid()
    
    ax.legend([
        Line2D([0,], [0,], color='C0', alpha=.7),
        Line2D([0,], [0,], color='C1', alpha=.7),
    ], [
        r'$\sigma^2_x$', r'$\sigma^2_y$'    
    ])
    
    return ftl.save_fig(fig, 'cov')
    

cov_fig = make_fig(x, get_from_ms(ms, key='sigma'))

img/cov.png
img/cov.pgf


In [21]:
@ftl.with_context
def make_fig(x, sigmas):
    fig = plt.figure()
    ax = fig.add_subplot()
    
    alpha = 5. / len(sigmas)
    for sigma in sigmas:
        o11 = sigma[:, 0, 0]
        o22 = sigma[:, 1, 1]
        o12 = sigma[:, 1, 0]
        ax.plot(x, o12 / np.sqrt(o11 * o22), color='C0', alpha=alpha)        
    
    fmt_t_axis(ax)
    ax.set_ylabel(r'$\mathrm{corr}(x, y)$')
    ax.grid()
    
    return ftl.save_fig(fig, 'corr')
    

corr_fig = make_fig(x, get_from_ms(ms, key='sigma'))

img/corr.png
img/corr.pgf


In [22]:
ftl.img_grid([xy_fig, nu_fig, cov_fig, corr_fig])

In [23]:
@ftl.with_context
def make_fig(x, losses):
    fig = plt.figure()
    ax = fig.add_subplot()
    
    alpha = 5. / len(losses)
    for loss in losses:
        ax.plot(x, loss, color='C0', alpha=alpha)     
   
    fmt_t_axis(ax)
    ax.set_ylabel(r'Loss')
    ax.grid()
    
    return ftl.save_fig(fig, 'loss')
    

make_fig(x, loss)

img/loss.png
img/loss.pgf


loss.png