In [2]:
from laplace.baselaplace import FullLaplace
from laplace.curvature.backpack import BackPackGGN
import numpy as np
import torch

from laplace import Laplace, marglik_training
import torch
from torchvision import datasets, transforms
import torch.utils.data as data_utils
import matplotlib.pyplot as plt
import torchvision


In [3]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

In [4]:
import torch.nn as nn


In [5]:
import torch.nn.functional as F


In [6]:
import torch.distributions as dists
from netcal.metrics import ECE



In [7]:
config = {
    "num_classes":10,
    "kernel_size": 5,
    "channels":1,
    "filter_1_out" :16,
    "filter_2_out" :32,
    "padding" :0,
    "stride" :1, 
    "pool":2,
    "learning_rate": 0.001,
    "epochs": 20,
    "batch_size": 64,
    "crop_size":128
}


In [8]:
device = torch.device('cpu')

In [9]:


def compute_conv_dim(dim_size, kernel_size, padding, stride):
  # (I-F)+2*P/S +1
    return int((dim_size - kernel_size + 2 * padding) / stride + 1)

def compute_pool_dim(dim_size, kernel_size, stride):
  #(I-F)/S +1
  return int((dim_size - kernel_size) / stride + 1)

In [10]:
train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(root='.', train=True, download=True,
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,))
                        ])), batch_size=config["batch_size"], shuffle=True, num_workers=2)

In [11]:
test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(root='.', train=False, transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,))
            ])), batch_size=config["batch_size"], shuffle=True, num_workers=2)

In [12]:
train_features, train_labels = next(iter(train_loader))
height = train_features.shape[2]
width = train_features.shape[3]
print(height,width)


28 28


In [13]:
class Net(nn.Module):
    def __init__(
        self,
        num_classes,
        channels,
        filter_1_out,
        filter_2_out,
        kernel_size,
        padding,
        stride,
        height,
        width,
        pool,
        parameterize,
    ):
        super(Net, self).__init__()
        self.num_classes = (num_classes,)
        self.channels = (channels,)
        self.filter_1_out = (filter_1_out,)
        self.filter_2_out = (filter_2_out,)
        self.kernel_size = (kernel_size,)
        self.padding = (padding,)
        self.stride = (stride,)
        self.height = (height,)
        self.width = (width,)
        self.pool = (pool,)
        self.parameterize = parameterize

        self.conv1 = nn.Conv2d(channels, filter_1_out, kernel_size)
        # evaluating image dimensions after first connvolution
        self.conv1_out_height = compute_conv_dim(
            height, kernel_size, padding, stride
        )
        self.conv1_out_width = compute_conv_dim(
            width, kernel_size, padding, stride
        )

        # first pooling
        self.pool1 = nn.MaxPool2d(pool, pool)
        # evaluating image dimensions after first pooling
        self.conv2_out_height = compute_pool_dim(
            self.conv1_out_height, pool, pool
        )
        self.conv2_out_width = compute_pool_dim(
            self.conv1_out_width, pool, pool
        )

        # Second Convolution
        self.conv2 = nn.Conv2d(filter_1_out, filter_2_out, kernel_size)
        # evaluating image dimensions after second convolution
        self.conv3_out_height = compute_conv_dim(
            self.conv2_out_height, kernel_size, padding, stride
        )
        self.conv3_out_width = compute_conv_dim(
            self.conv2_out_width, kernel_size, padding, stride
        )
        self.conv2_drop = nn.Dropout2d()

        # Second pooling
        self.pool2 = nn.MaxPool2d(pool, pool)
        # evaluating image dimensions after second pooling
        self.conv4_out_height = compute_pool_dim(
            self.conv3_out_height, pool, pool
        )
        self.conv4_out_width = compute_pool_dim(
            self.conv3_out_width, pool, pool
        )

        self.fc1 = nn.Linear(
            filter_2_out * self.conv4_out_height * self.conv4_out_width, 50
        )
        self.fc2 = nn.Linear(50, num_classes)

        

    def forward(self, x):

        
        # convolutional layer 1
        x = F.relu(self.pool1(self.conv1(x)))

        # convolutional layer 2
        x = F.relu(self.pool2(self.conv2_drop(self.conv2(x))))

        x = x.view(
            -1, self.filter_2_out[0] * self.conv4_out_height * self.conv4_out_width
        )

        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)

        return x

In [14]:
model = Net(
        config["num_classes"],
        config["channels"],
        config["filter_1_out"],
        config["filter_2_out"],
        config["kernel_size"],
        config["padding"],
        config["stride"],
        height,
        width,
        config["pool"],parameterize=False).to(device).eval()

In [15]:
# model_path = '/Users/georgioszefkilis/Bayesian_Deep_Learning/models/best_checkpoint.pth'
model_path = '/Users/georgioszefkilis/Bayesian_Deep_Learning/saved_models/colab_best_Vanilla_MNIST_20.pth'
checkpoint = torch.load(model_path, map_location=device)
    # initialize state_dict from checkpoint to model
model.load_state_dict(checkpoint["state_dict"])

<All keys matched successfully>

In [15]:
targets = torch.cat([y for x, y in test_loader], dim=0).cpu()


In [17]:
targets.shape

torch.Size([10000])

In [16]:
def predict(trained_model,laplace=False):
    with torch.no_grad():

        correct = 0
        

        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            if laplace:
                output = trained_model(data)
            else:
                output = torch.softmax(trained_model(data),dim=-1)
            
            acc_map = (output.argmax(-1) == target).float().mean()
            ece_map = ECE(bins=15).measure(output.numpy(), target.numpy())
            nll_map = -dists.Categorical(output).log_prob(target).mean()

            # ps = torch.exp(output)

            # # Keep track of how many are correctly classified
            # top_p, top_class = ps.topk(1, dim=1)
            # equals = top_class == target.view(*top_class.shape)
            # correct += equals.type(torch.FloatTensor).sum().item()
            _,predicted = torch.max(output.data,1)
            correct+=(predicted==target).sum().item()

        print(
            "\nTest set: Accuracy: {}/{} ({:.0f}%)\n".format(
                correct,
                len(test_loader.dataset),
                100.0 * correct / len(test_loader.dataset),
            )
        )
        print(f"Acc.: {acc_map:.1%}; ECE: {ece_map:.1%}; NLL: {nll_map:.3}")

    accuracy = correct / len(test_loader.dataset)
    return accuracy,acc_map,ece_map,nll_map

In [39]:
@torch.no_grad()
def predict_example(dataloader, model, laplace=False):
    py = []
    target = []
    for x, t in dataloader:
        x,t = x.to(device),t.to(device)
        target.append(t)
        if laplace:
            py.append(model(x))
        else:
            py.append(torch.softmax(model(x), dim=-1))
    images = torch.cat(py).cpu()
    labels =torch.cat(target, dim=0).cpu()
    return images,labels

# Last layer implementation

## Without Laplace

In [23]:
accuracy,acc_map,ece_map,nll_map = predict(model, laplace=False)



Test set: Accuracy: 9913/10000 (99%)

Acc.: 100.0%; ECE: 1.7%; NLL: 0.0202


In [43]:
probs_map,targets = predict_example(test_loader, model, laplace=False)
#targets = torch.cat([y for x, y in test_loader], dim=0).cpu()

acc_map = (probs_map.argmax(-1) == targets).float().mean()
print(acc_map)
ece_map = ECE(bins=15).measure(probs_map.numpy(), targets.numpy())
nll_map = -dists.Categorical(probs_map).log_prob(targets).mean()

print(f"[MAP] Acc.: {acc_map:.1%}; ECE: {ece_map:.1%}; NLL: {nll_map:.3}")

tensor(0.9913)
[MAP] Acc.: 99.1%; ECE: 0.3%; NLL: 0.0289


## With Laplace

In [18]:
la = Laplace(model, 'classification',
             subset_of_weights='last_layer',
             hessian_structure='kron')
la.fit(train_loader)
la.optimize_prior_precision(method='marglik')

In [21]:
accuracy,acc_map,ece_map,nll_map = predict(la, laplace=True)



Test set: Accuracy: 9915/10000 (99%)

Acc.: 100.0%; ECE: 0.3%; NLL: 0.00351


In [42]:
probs_map,targets = predict_example(test_loader, la, laplace=True)
#targets = torch.cat([y for x, y in test_loader], dim=0).cpu()

acc_map = (probs_map.argmax(-1) == targets).float().mean()
print(acc_map)
ece_map = ECE(bins=15).measure(probs_map.numpy(), targets.numpy())
nll_map = -dists.Categorical(probs_map).log_prob(targets).mean()

print(f"[Laplace] Acc.: {acc_map:.1%}; ECE: {ece_map:.1%}; NLL: {nll_map:.3}")

tensor(0.9915)
[Laplace] Acc.: 99.2%; ECE: 0.5%; NLL: 0.0287


### Observations
Not sure 100% that it works corect. Evey run gives different results for both with and without laplace.

# Subnetwork implementation

In [44]:
from laplace.baselaplace import FullLaplace
from laplace.curvature.backpack import BackPackGGN
from laplace.utils import ModuleNameSubnetMask

In [78]:
for name,m in model.named_modules():
    print(name)


conv1
pool1
conv2
conv2_drop
pool2
fc1
fc2


In [45]:
print('start_laplace')
subnetwork_mask = ModuleNameSubnetMask(model, module_names=['fc1'])
print('step 2')
subnetwork_mask.select()
print('step 3')
subnetwork_indices = subnetwork_mask.indices
print('step 4')
sub_laplace = Laplace(
    model,
    "classification",
    subset_of_weights="subnetwork",
    hessian_structure="full",
    subnetwork_indices = subnetwork_indices#.type(torch.LongTensor),
)
print('fit')
sub_laplace.fit(train_loader)
print('optimize')
sub_laplace.prior_precision=torch.tensor([0.00001])

#laplace.optimize_prior_precision(method="marglik",val_loader=test_loader)

start_laplace
step 2
step 3
step 4
fit




In [80]:
la_accuracy,acc_map,ece_map,nll_map = predict(sub_laplace,laplace=True)



Test set: Accuracy: 9903.0/10000 (99%)

Acc.: 100.0%; ECE: 4.0%; NLL: 0.0419


In [None]:
probs_map,targets = predict_example(test_loader, sub_laplace, laplace=True)
#targets = torch.cat([y for x, y in test_loader], dim=0).cpu()

acc_map = (probs_map.argmax(-1) == targets).float().mean()
print(acc_map)
ece_map = ECE(bins=15).measure(probs_map.numpy(), targets.numpy())
nll_map = -dists.Categorical(probs_map).log_prob(targets).mean()

print(f"[Laplace] Acc.: {acc_map:.1%}; ECE: {ece_map:.1%}; NLL: {nll_map:.3}")

In [1]:
sub_laplace.prior_precision=torch.tensor([0.00001])


NameError: name 'torch' is not defined

In [None]:
la_accuracy,acc_map,ece_map,nll_map = predict(sub_laplace,laplace=True)
