## Active learning
It's example how to benchmark algorithm by active learning.


In [None]:
from copy import deepcopy
from collections import defaultdict

import torch
from torch import nn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from alpaca.model.mlp import MLP 
from alpaca.dataloader.builder import build_dataset
from alpaca.dataloader.utils import loader
from alpaca.active_learning.simple_update import update_set

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
start_size = 500
step_size = 10
steps = 50
batch_size = 128
methods = ['mc_dropout', 'random', 'max_entropy']

In [None]:
# Load dataset
mnist = build_dataset('year_prediction_msd', val_split=1_000)
x_set, y_set = mnist.dataset('train')
x_train_init, x_pool_init, y_train_init, y_pool_init = train_test_split(x_set, y_set, train_size=start_size)
x_val, y_val = mnist.dataset('val')
val_loader = loader(x_val, y_val)

In [None]:
criterion = nn.MSELoss()
layers = (x_val.shape[-1], 256, 128, 64, 1)

def val_loss(model, x_val, y_val):
    predictions = model(torch.DoubleTensor(x_val).to(device))
    loss = criterion(predictions, torch.DoubleTensor(y_val).to(device))
    return loss.item()
    

In [None]:
results = defaultdict(list) 
for method in methods:
    print(method)
    model = MLP(layers)
    model.to(device)
    x_train, y_train = deepcopy(x_train_init), deepcopy(y_train_init)
    x_pool, y_pool = deepcopy(x_pool_init), deepcopy(y_pool_init)

    optimizer = torch.optim.Adam(model.parameters())
    
    # Iternatively train model, sampling more dataset from pool on each step
    for step in range(steps):
        print(step+1, end=' ')
        train_loader = loader(x_train, y_train)
        for epoch in range(3):
            for x_batch, y_batch in train_loader: # Train for one epoch
                predictions = model(x_batch.to(device))
                loss = criterion(predictions, y_batch.to(device))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
        results[method].append(val_loss(model, x_val, y_val))
            
        # update sets
        x_pool, x_train, y_pool, y_train = update_set(
            x_pool, x_train, y_pool, y_train, step_size, model=model, method=method, task='regression')


In [None]:
plt.figure()
plt.xlabel('Steps')
plt.ylabel('MSE loss')
for method in methods:
    plt.plot(results[method][3:], label=method)
plt.legend()
