# Quick-start

This document helps you get up-and-running with `alr` immediately.
It should give you a general idea of how to get started with
this package.


In [0]:
import numpy as np
import torch
import torch.utils.data as torchdata

from torch.nn import functional as F
from torch import nn

from alr import MCDropout
from alr.acquisition import RandomAcquisition, BALD
from alr.utils import stratified_partition, eval_fwd_exp
from alr.data import DataManager, UnlabelledDataset
from alr.data.datasets import Dataset

np.random.seed(42)
torch.manual_seed(42)
data_loader_params = dict(pin_memory=True, num_workers=2, batch_size=32)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

Firstly, we load and prepare our data.
Note that we partitioned the training set into labelled and unlabelled sets
using `stratified partition` which balances the number of classes in the training pool:

In [3]:
# load training data
train, test = Dataset.MNIST.get()
train, pool = stratified_partition(train, Dataset.MNIST.about.n_class, size=20)
pool = UnlabelledDataset(pool)
len(train), len(test), len(pool)

(20, 10000, 59980)

`MCDropout` lets us define a Bayesian NN. It provides an implementation
for `stochastic_forward` which we will use in the next section for the
acquisition function.

> Notice the dropout layers have been changed to their `Persistent` versions.

In [4]:
# instantiate a regular model and an optimiser.
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        # 32 24 24
        self.dropout1 = nn.Dropout2d()
        # maxpool --
        # 32 12 12
        self.conv2 = nn.Conv2d(32, 64, 5)
        # 64 8 8
        self.dropout2 = nn.Dropout2d()
        # maxpool --
        # 64 4 4
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.dropout3 = nn.Dropout()
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.max_pool2d(self.dropout1(F.relu(self.conv1(x))), 2)
        x = F.max_pool2d(self.dropout2(F.relu(self.conv2(x))), 2)
        x = x.view(-1, 64 * 4 * 4)
        x = self.fc2(self.dropout3(F.relu(self.fc1(x))))
        return F.log_softmax(x, dim=1)

model = MCDropout(Net(), forward=20).to(device)
model.compile(criterion=torch.nn.NLLLoss(),
              optimiser=torch.optim.Adam(model.parameters()))
model

MCDropout(
  (base_model): Net(
    (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (dropout1): PersistentDropout2d(p=0.5, inplace=False)
    (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (dropout2): PersistentDropout2d(p=0.5, inplace=False)
    (fc1): Linear(in_features=1024, out_features=128, bias=True)
    (dropout3): PersistentDropout(p=0.5, inplace=False)
    (fc2): Linear(in_features=128, out_features=10, bias=True)
  )
)

Now, we can instantiate an acquisition function
and an associated `DataManager` instance:

In [0]:
bald = BALD(
    eval_fwd_exp(model), device=device,
    batch_size=1024, pin_memory=True, num_workers=2
)
dm = DataManager(train, pool, bald)

Finally, the vanilla acquisition loop looks like:

In [6]:
ITERS = 24
EPOCHS = 50
accs = {}

# In each iteration, acquire `b` points
for i in range(ITERS):
    print(f"==== Iteration {i + 1} ({(i + 1) / ITERS:.2%}), "
          f"training size: {dm.n_labelled} ====")
    # reset weights to original values when the model was first created
    model.reset_weights()
    # fit = train
    result = model.fit(
        train_loader=torchdata.DataLoader(
            dm.labelled, shuffle=True, **data_loader_params
        ),
        train_acc=True, epochs=EPOCHS, device=device, quiet=True
    )
    # evaluate the model to obtain its test accuracy
    test_acc, test_loss = model.evaluate(
        data=torchdata.DataLoader(test, **data_loader_params),
        device=device, quiet=True
    )
    # display results
    result.reduce('last', inplace=True)
    accs[dm.n_labelled] = test_acc
    print(f"train_acc = {result.train_acc:.2f}, "
          f"train_loss = {result.train_loss:.2f}, "
          f"test_acc = {test_acc:.2f}, "
          f"test_loss = {test_loss:.2f}")
    # acquire `b` points from unlabelled pool
    dm.acquire(b=10)
accs


==== Iteration 1 (4.17%), training size: 20 ====
train_acc = 1.00, train_loss = 0.24, test_acc = 0.63, test_loss = 1.88
==== Iteration 2 (8.33%), training size: 30 ====


	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


train_acc = 1.00, train_loss = 0.12, test_acc = 0.67, test_loss = 1.60
==== Iteration 3 (12.50%), training size: 40 ====
train_acc = 1.00, train_loss = 0.09, test_acc = 0.74, test_loss = 1.33
==== Iteration 4 (16.67%), training size: 50 ====
train_acc = 1.00, train_loss = 0.47, test_acc = 0.79, test_loss = 1.09
==== Iteration 5 (20.83%), training size: 60 ====
train_acc = 1.00, train_loss = 0.34, test_acc = 0.80, test_loss = 1.04
==== Iteration 6 (25.00%), training size: 70 ====
train_acc = 1.00, train_loss = 0.18, test_acc = 0.77, test_loss = 1.09
==== Iteration 7 (29.17%), training size: 80 ====
train_acc = 1.00, train_loss = 0.38, test_acc = 0.80, test_loss = 1.01
==== Iteration 8 (33.33%), training size: 90 ====
train_acc = 1.00, train_loss = 0.34, test_acc = 0.83, test_loss = 0.89
==== Iteration 9 (37.50%), training size: 100 ====
train_acc = 0.99, train_loss = 0.70, test_acc = 0.81, test_loss = 1.00
==== Iteration 10 (41.67%), training size: 110 ====
train_acc = 1.00, train_loss 

{20: 0.6266,
 30: 0.6722,
 40: 0.7433,
 50: 0.7899,
 60: 0.7966,
 70: 0.7696,
 80: 0.8032,
 90: 0.835,
 100: 0.8087,
 110: 0.8538,
 120: 0.8807,
 130: 0.8692,
 140: 0.8975,
 150: 0.9246,
 160: 0.9333,
 170: 0.9326,
 180: 0.9161,
 190: 0.9301,
 200: 0.934,
 210: 0.9295,
 220: 0.9349,
 230: 0.9354,
 240: 0.9411,
 250: 0.9501}