In [1]:
import numpy as np
import torch
from modAL.models import ActiveLearner
from skorch import NeuralNetClassifier
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor


In [2]:
# build class for the skorch API
class Torch_Model(nn.Module):
    def __init__(
        self,
    ):
        super(Torch_Model, self).__init__()
        self.convs = nn.Sequential(
            nn.Conv2d(1, 32, 3),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25),
        )
        self.fcs = nn.Sequential(
            nn.Linear(12 * 12 * 64, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        out = x
        out = self.convs(out)
        out = out.view(-1, 12 * 12 * 64)
        out = self.fcs(out)
        return out



In [3]:
# create the classifier
device = "cuda" if torch.cuda.is_available() else "cpu"
classifier = NeuralNetClassifier(
    Torch_Model,
    # max_epochs=100,
    criterion=nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,
    train_split=None,
    verbose=1,
    device=device,
)

"""
Data wrangling
1. Reading data from torchvision
2. Assembling initial training data for ActiveLearner
3. Generating the pool
"""

mnist_data = MNIST(".", download=True, transform=ToTensor())
dataloader = DataLoader(mnist_data, shuffle=True, batch_size=60000)
X, y = next(iter(dataloader))


In [4]:
# read training data
X_train, X_test, y_train, y_test = X[:50000], X[50000:], y[:50000], y[50000:]
X_train = X_train.reshape(50000, 1, 28, 28)
X_test = X_test.reshape(10000, 1, 28, 28)


In [5]:
# assemble initial data
n_initial = 1000
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]


In [6]:
# generate the pool
# remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)

"""
Training the ActiveLearner
"""


'\nTraining the ActiveLearner\n'

In [7]:
# initialize ActiveLearner
learner = ActiveLearner(
    estimator=classifier,
    X_training=X_initial,
    y_training=y_initial,
)


  epoch    train_loss     dur
-------  ------------  ------
      1        [36m2.5579[0m  1.9494
      2        [36m1.8733[0m  0.0489
      3        [36m1.0252[0m  0.0440
      4        [36m0.7877[0m  0.0467
      5        [36m0.6321[0m  0.0491
      6        [36m0.5159[0m  0.0487
      7        [36m0.4544[0m  0.0473
      8        [36m0.3942[0m  0.0462
      9        0.4038  0.0418
     10        [36m0.3502[0m  0.0422


In [8]:
# the active learning loop
n_queries = 10
for idx in range(n_queries):
    query_idx, query_instance = learner.query(X_pool, n_instances=100)
    learner.teach(X_pool[query_idx], y_pool[query_idx], only_new=False)
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx, axis=0)


Re-initializing module.
Re-initializing optimizer.
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m2.5581[0m  0.0528
      2        [36m1.5429[0m  0.0559
      3        [36m0.8327[0m  0.0507
      4        [36m0.6260[0m  0.0562
      5        [36m0.5081[0m  0.0492
      6        [36m0.4362[0m  0.0542
      7        [36m0.3577[0m  0.0499
      8        [36m0.2902[0m  0.0726
      9        [36m0.2707[0m  0.0553
     10        [36m0.2688[0m  0.0579
Re-initializing module.
Re-initializing optimizer.
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m2.7067[0m  0.0487
      2        [36m1.7498[0m  0.0523
      3        [36m1.2897[0m  0.0509
      4        [36m0.8644[0m  0.0576
      5        [36m0.6289[0m  0.0595
      6        [36m0.5019[0m  0.0604
      7        [36m0.3924[0m  0.0563
      8        [36m0.3378[0m  0.0757
      9        [36m0.2724[0m  0.0574
     10        0.2897  0.0563
Re-init

In [9]:
# the final accuracy score
print(learner.score(X_test, y_test))


0.9477
