# Task
Uncertaintiy Modeling + Interactive Learning:

* **Implement** a CNN classifier with dropout layers and modify it so that the dropout layers can be left on during inference.  

* Then **compare** the performance of a couple of uncertainty-based query strategies from the modal active learning library against Entropy-based uncertainty sampling based on dropout-enabled inference.

## First attempt:

Code from: https://github.com/modAL-python/modAL

Git cloning and installing the library for AL from github

In [None]:
!git clone https://github.com/modAL-python/modAL

%pip install git+https://github.com/modAL-python/modAL.git

In [2]:
import modAL

### 1. Implement a CNN classifier with dropout layers and modify it so that the dropout layers can be left on during inference.

In [None]:
%pip install torch
%pip install torchvision
%pip install torchsummary

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.models as models
from torchvision import datasets
from torchvision import transforms as T
from torchsummary import summary


%pip install wandb -q
import wandb

Log in into wandab and initialize experiment

In [None]:
!wandb login

In [None]:
wandb.init(project="ML4NLP2", name="run1")

#### Define model

In [2]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler


# Device configuration, use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Define data_loader and pre-processer

In [None]:
def data_loader(data_dir,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):
  
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    transform = transforms.Compose([
            transforms.Resize((227,227)),
            transforms.ToTensor(),
            normalize,
    ])

    if test:
        dataset = datasets.CIFAR100(
          root=data_dir, train=False,
          download=True, transform=transform,
        )

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # load the dataset
    train_dataset = datasets.CIFAR100(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


# CIFAR100 dataset 
train_loader, valid_loader = data_loader(data_dir='./data',
                                         batch_size=64)

test_loader = data_loader(data_dir='./data',
                              batch_size=64,
                              test=True)

In [None]:
img_train = np.load('/Users/gabrieledele/kmnist/kmnist-train-imgs.npz')['arr_0']
label_train = np.load('/Users/gabrieledele/kmnist/kmnist-train-labels.npz')['arr_0']

mean = img_train.mean()
std = img_train.std()

print(mean)
print(std)

In [None]:
class VGG16_Dropout(nn.Module):
    def __init__(self, num_classes=10, dropout_enabled=True):
        super(VGG16_Dropout, self).__init__()
        self.dropout_enabled = dropout_enabled

        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),  # Adjust dropout probability as needed
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),  # Adjust dropout probability as needed
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        
        # Apply dropout only during training if enabled
        if self.dropout_enabled:
            x = nn.functional.dropout(x, training=self.training)
        
        x = self.classifier(x)
        return x

# Instantiate the model with dropout enabled during training
vgg16_dropout_model = VGG16_Dropout(num_classes=10, dropout_enabled=True)

# Print the model architecture
print(vgg16_dropout_model)

In [10]:
def get_cost_function():
  cost_function = torch.nn.CrossEntropyLoss()
  return cost_function

def get_optimizer(net, lr, wd, momentum):
  optimizer = torch.optim.SGD(net.parameters(), lr=lr, weight_decay=wd, momentum=momentum)
  return optimizer

modAl needs a wrapper for the cnn to work bc it emulates a scikit architecture?? kinda like that

In [11]:
from skorch import NeuralNetClassifier

# create the classifier
device = "cuda" if torch.cuda.is_available() else "cpu"
classifier = NeuralNetClassifier(VGG16_Dropout,
                                 criterion=nn.CrossEntropyLoss,
                                 optimizer=torch.optim.SGD,
                                 train_split=None,
                                 verbose=1,
                                 device=device)

Get data

In [None]:
import numpy as np
import torchvision
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST


kmnist_data = KMNIST(root='./data', download=True, transform=ToTensor())
dataloader = DataLoader(kmnist_data, shuffle=True, batch_size=70000)
X, y = next(iter(dataloader))

# read training data
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
X_train = X_train.reshape(60000, 1, 28, 28)
X_test = X_test.reshape(10000, 1, 28, 28)

# assemble initial data
n_initial = 1000
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)
X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]

# generate the pool
# remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)[:6000]
y_pool = np.delete(y_train, initial_idx, axis=0)[:6000]