In [1]:
# Load library
import sys
import os
PROJ_DIR = os.path.realpath(os.path.dirname(os.path.dirname(os.path.abspath(''))))
sys.path.append(os.path.join(PROJ_DIR,'src'))

import xai_faithfulness_experiments_lib_edits as ff

In [7]:
DATASET_NAME = 'mnist'
MODEL_NAME = 'ood-mean_softmax'

# Load dataset
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Using {device}')
import torchvision

batch_size = 256

MNIST_PATH = os.path.join(PROJ_DIR, 'data', 'mnist')

train_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(MNIST_PATH, train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
  torchvision.datasets.MNIST(MNIST_PATH, train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size, shuffle=True)

examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

Using cuda:0


In [3]:
import torch
import numpy as np
class RandomMasker(torch.nn.Module):
    def __init__(self, masking_value:torch.tensor):
        super().__init__()
        self._masking_value = masking_value
    def forward(self, x): # Assumes inputs are (batch_size, num_vars)
        selection_levels = torch.rand((x.shape[0], 1)).to(device) # A different selection level for each element of the batch
        while len(selection_levels.shape) < len(x.shape):
            selection_levels = selection_levels.unsqueeze(dim=-1)
        selected_pixels = torch.le(torch.rand(x.shape).to(device), selection_levels) # A different selection level for each element of the batch
        return x * selected_pixels + self._masking_value * ~selected_pixels

# Zeros or mean
masking_value = np.zeros(example_data.shape[1:])
masker = RandomMasker(torch.tensor(masking_value).float().to(device)).to(device)

Using cuda:0


In [6]:
# Declare classifier
network = ff.MNISTClassifier().to(device)

In [7]:
MODEL_LR = 1e-3
loss = torch.nn.BCELoss()
optimizer = torch.optim.Adam(network.parameters(), lr=MODEL_LR)#, weight_decay=1e-3)
MODEL_EPOCHS = 10

_, (x_test, y_test) = next(enumerate(test_loader))
x_test = x_test.to(device)
y_test = y_test.to(device)

if MODEL_NAME == 'untrained':
    MODEL_EPOCHS = 0 # Untrained
for epoch in range(MODEL_EPOCHS):
    for batch_idx, (x_train, y_train) in enumerate(train_loader):
        x_train = x_train.to(device)
        y_train = y_train.to(device)
        if 'ood' in MODEL_NAME:
            x_train_tensor_masked = masker(x_train) # A different set of RandomMasks for each batch
        else:
            x_train_tensor_masked = x_train # Unmasked
        optimizer.zero_grad()
        
        preds = network(x_train_tensor_masked)
        label_onehot = torch.zeros(y_train.shape[0], MODEL_LABEL_NUM).to(device)
        label_onehot.scatter_(1, y_train.unsqueeze(1), 1)
        loss_value = loss(preds, label_onehot)
        loss_value.backward()        
        optimizer.step()

        train_accuracy = (preds.argmax(dim=1) == y_train).float().mean() 

        test_preds = network.forward(x_test)        
        test_accuracy = (test_preds.argmax(dim=1) == y_test).float().mean() 
        print(f'Epoch {epoch + 1}/{MODEL_EPOCHS} - Loss: {loss_value.item():.4f} - Train accuracy: {train_accuracy:.2f} - Test accuracy: {test_accuracy:.2f}')
        #if test_accuracy > 0.6: # Undertrained
        #    break

Epoch 1/10 - Loss: 0.3305 - Train accuracy: 0.11 - Test accuracy: 0.14
Epoch 1/10 - Loss: 0.3246 - Train accuracy: 0.13 - Test accuracy: 0.15
Epoch 1/10 - Loss: 0.3264 - Train accuracy: 0.13 - Test accuracy: 0.14
Epoch 1/10 - Loss: 0.3229 - Train accuracy: 0.16 - Test accuracy: 0.20
Epoch 1/10 - Loss: 0.3238 - Train accuracy: 0.10 - Test accuracy: 0.18
Epoch 1/10 - Loss: 0.3212 - Train accuracy: 0.15 - Test accuracy: 0.18
Epoch 1/10 - Loss: 0.3212 - Train accuracy: 0.17 - Test accuracy: 0.20
Epoch 1/10 - Loss: 0.3167 - Train accuracy: 0.26 - Test accuracy: 0.20
Epoch 1/10 - Loss: 0.3165 - Train accuracy: 0.23 - Test accuracy: 0.23
Epoch 1/10 - Loss: 0.3181 - Train accuracy: 0.25 - Test accuracy: 0.23
Epoch 1/10 - Loss: 0.3134 - Train accuracy: 0.28 - Test accuracy: 0.23
Epoch 1/10 - Loss: 0.3131 - Train accuracy: 0.22 - Test accuracy: 0.24
Epoch 1/10 - Loss: 0.3085 - Train accuracy: 0.29 - Test accuracy: 0.27
Epoch 1/10 - Loss: 0.3078 - Train accuracy: 0.23 - Test accuracy: 0.30
Epoch 

In [None]:
# Save model
torch.save(network.state_dict(), os.path.join(PROJ_DIR,'assets','models',f'{DATASET_NAME}-{MODEL_NAME}-mlp.pth'))

In [8]:
network = ff.load_pretrained_mnist_model(os.path.join(PROJ_DIR,'assets','models',f'{DATASET_NAME}-{MODEL_NAME}-mlp.pth'))

In [9]:
network.eval()
test_accuracy = 0
num_batches = 0
for batch_idx, (x_test, y_test) in enumerate(train_loader):
    x_test = x_test.to(device)
    y_test = y_test.to(device)
    num_batches += 1
    test_preds = network.forward(x_test)        
    test_accuracy += (test_preds.argmax(dim=1) == y_test).float().mean()
print((test_accuracy.item()) / num_batches)

0.9302581787109375


In [10]:
import json
MODELS_PATH = os.path.join(PROJ_DIR,'assets','models')
with open(os.path.join(MODELS_PATH, 'model-accuracies.json')) as fIn:
    models = json.load(fIn)
models[f'{DATASET_NAME}-{MODEL_NAME}'] = test_accuracy.item()
with open(os.path.join(MODELS_PATH, 'model-accuracies.json'), 'w') as fOut:
    json.dump(models, fOut)