In [4]:
import os

In [6]:
os.getcwd()

'C:\\Users\\Damja\\OneDrive\\Damjan\\HS22\\Deep Learning\\project\\dev'

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.distributions import Normal
from torch.utils.data import Subset
from torch.distributions import Categorical, Normal, StudentT
from torch.optim import SGD
from torch.optim.lr_scheduler import PolynomialLR
import torchvision
from torchvision import datasets, transforms
import torchvision.transforms as tr
import torchmetrics
from torchmetrics.functional import calibration_error
import math
import matplotlib.pyplot as plt
import random
from collections import deque, OrderedDict
from tqdm import trange
import tqdm
import copy
import typing
from typing import Sequence, Optional, Callable, Tuple, Dict, Union
import pandas as pd

from data import Data
from priors import *
from Networks import *
from BayesianNN import BNN_MCMC
from SGLD import SGLD

In [2]:
# Load MNIST dataset with specified transforms
# possible transforms: RandomRotation, RandomCrop, GaussianBlur
# avoid Normalize and ToTensor (already done), RandomHorizontalFlip (for MNIST), RandomVerticalFlip (for MNIST)
augmentations = tr.Compose([tr.RandomRotation(15)])

train_data, test_data = Data("MNIST", augmentations = None).get_data(num_train_samples=60000)

# Print some information about the dataset
print("Train data size: ", len(train_data))
print("Test data size: ", len(test_data))


Train data size:  60000
Test data size:  10000


In [4]:
prior = Normal_Inverse_Gamma(0, 1, 1, 1)
lol = BNN_MCMC(train_data, network = FullyConnectedNN(), prior=prior, num_epochs = 30, max_size = 15, burn_in = 5, lr = 1e-3, sample_interval = 2)
lol.train()

Training Model


  0%|          | 0/30 [00:00<?, ?it/s]


TypeError: only integer tensors of a single element can be converted to an index

In [3]:
acc = lol.test_accuracy(test_data)
ece = lol.test_calibration(test_data)
print("Test accuracy: ", acc)
print("Test ECE: ", ece)

NameError: name 'lol' is not defined

In [None]:
# get weights from all models
param_flat_all = []
for model in lol.model_sequence:
    parameters = model.state_dict()
    param_values = list(parameters.values())
    param_flat = torch.cat([v.flatten() for v in param_values])
    param_flat_all.append(param_flat.flatten())

# put all weights in one array
params = np.concatenate(param_flat_all)

# plot weights
plt.hist(params, bins=1000, range=(-4, 4))
plt.title("Weights of all models, GaussianMixture(-2, 0.5, 2, 0.5) ")
plt.show()

In [None]:
x, var = Normal_Inverse_Gamma(0, 1, 1, 1).sample(100000)
x = x.flatten().numpy()
var = var

# plot prior samples
plt.hist(x, bins=1000, range=(-5, 5))
plt.show()

# plot prior samples
print(var)


# Pretrain Prior

In [6]:
# pretrain on FashionMNIST
pretrain_data, pretest_data = Data("FashionMNIST", augmentations = None).get_data(num_train_samples=600)

prior = Isotropic_Gaussian()
pretrainer = BNN_MCMC(pretrain_data, network = FullyConnectedNN(), prior=prior, num_epochs = 30, max_size = 15, burn_in = 5, lr = 1e-3, sample_interval = 2)
pretrainer.train()

# get pretrained prior parameters
mu, var = pretrainer.get_posterior_stats()



Training Model


100%|██████████| 30/30 [00:01<00:00, 15.01it/s, acc=0.125, log_prior_normalized=8.24e+4, loss=8.25e+4, lr=0.000183, nll_loss=10.7] 


# Evaluation loop

In [10]:
pretrained_Isotropic_Gaussian = Isotropic_Gaussian(mu, var)

prior_list = [Isotropic_Gaussian(),
              StudentT_prior(),
              Laplace_prior(),
              Gaussian_Mixture(), 
              GaussianSpikeNSlab(),
              MixedLaplaceUniform(), 
              Normal_Inverse_Gamma()]

prior_list_v2 = [Isotropic_Gaussian,
              StudentT_prior,
              Laplace_prior,
              Gaussian_Mixture, 
              GaussianSpikeNSlab,
              MixedLaplaceUniform, 
              Normal_Inverse_Gamma]

sample_sizes = [3750, 7500, 15000, 30000, 60000, 120000]

Temperatures = [0.001, 0.01, 0.1, 1, 10]


prior_list = [] 
for prior in prior_list_v2:
    for temp in Temperatures:
        prior_list.append(prior(Temperature=temp))
        
# prior list is then list of all priors initialized with the correpsonding temperature
# [prior1_temp1, prior2_temp1, ..., prior7_temp1, prior1_temp2, prior2_temp2, ... ]

  self.loc = torch.tensor(loc, dtype=torch.float32)
  self.scale = torch.tensor(scale, dtype=torch.float32)


In [11]:
# preallocate pandas dataframe for results
results = pd.DataFrame(columns = ["Prior", "Sample Size", "Temperature", "Test Accuracy", "Test ECE"], index = range(len(prior_list)*len(sample_sizes)))

results

Unnamed: 0,Prior,Sample Size,Test Accuracy,Test ECE
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
5,,,,
6,,,,
7,,,,
8,,,,
9,,,,


In [12]:
# run the experiment for all priors

#create a dict for the different parameter values
base_epoch, base_burn_in, base_sample_interval, base_samplesize = 50, 15, 2, sample_sizes[-1]
args_dict = [(sample_size, (base_epoch*base_samplesize/sample_size, base_burn_in*base_samplesize/sample_size, base_sample_interval*base_samplesize/sample_size )) for sample_size in sample_sizes]
args_dict = dict(args_dict)


for n in range(len(sample_sizes)):
    # get data
    if sample_sizes[n] == 120000:
        # if sample size is 120000, use data augmentation
        augmentations = tr.Compose([tr.RandomRotation(15)])
        train_data, test_data = Data("MNIST", augmentations = augmentations).get_data()
    else:
        # subsample original train data if sample size is smaller than 120000
        train_data, test_data = Data("MNIST", augmentations = None).get_data(num_train_samples=sample_sizes[n])
    
    for i in range(len(prior_list)):
        # get prior
        prior = prior_list[i]
        print(50*"-")
        print(prior.name)
        print(prior.Temperature)

        # run BNN
        lol = BNN_MCMC(
            train_data,
            network = FullyConnectedNN(),
            prior=prior,
            num_epochs = args_dict[sample_sizes[n]][0],
            max_size = 10,
            burn_in = args_dict[sample_sizes[n]][1],
            lr = 1e-3,
            sample_interval = args_dict[sample_sizes[n]][2])

        lol.train()

        # get test accuracy and ECE
        acc = lol.test_accuracy(test_data)
        ece = lol.test_calibration(test_data)
        print("Test accuracy: ", acc)
        print("Test ECE: ", ece)

        # save results
        results.iloc[i+n*len(prior_list), :] = prior.name, sample_sizes[n], prior.Temperature, acc.numpy(), ece.numpy()
    

--------------------------------------------------
Isotropic Gaussian
Training Model


100%|██████████| 50/50 [00:12<00:00,  3.89it/s, acc=0.898, log_prior_normalized=8.27e+4, loss=8.27e+4, lr=0.000141, nll_loss=13.8]


Test accuracy:  tensor(0.8783)
Test ECE:  tensor(0.1307)
--------------------------------------------------
Student-T
Training Model


100%|██████████| 50/50 [00:15<00:00,  3.24it/s, acc=0.852, log_prior_normalized=8.49e+4, loss=8.49e+4, lr=0.000141, nll_loss=15.6]


Test accuracy:  tensor(0.8770)
Test ECE:  tensor(0.1516)
--------------------------------------------------
Laplace
Training Model


100%|██████████| 50/50 [00:15<00:00,  3.25it/s, acc=0.109, log_prior_normalized=6.28e+4, loss=6.29e+4, lr=0.000141, nll_loss=67.4] 


Test accuracy:  tensor(0.1135)
Test ECE:  tensor(0.0128)
--------------------------------------------------
Gaussian Mixture
Training Model


100%|██████████| 50/50 [00:20<00:00,  2.41it/s, acc=0.898, log_prior_normalized=1.51e+5, loss=1.51e+5, lr=0.000141, nll_loss=10.7]


Test accuracy:  tensor(0.8860)
Test ECE:  tensor(0.0764)
--------------------------------------------------
Gaussian Spike and Slab
Training Model


100%|██████████| 50/50 [00:34<00:00,  1.45it/s, acc=0.922, log_prior_normalized=1.03e+5, loss=1.03e+5, lr=0.000141, nll_loss=12.1]


Test accuracy:  tensor(0.8770)
Test ECE:  tensor(0.1293)
--------------------------------------------------
Mixed Laplace and Uniform
Training Model


100%|██████████| 50/50 [00:13<00:00,  3.59it/s, acc=0.922, log_prior_normalized=1.24e+5, loss=1.24e+5, lr=0.000141, nll_loss=6.51]


Test accuracy:  tensor(0.8859)
Test ECE:  tensor(0.0308)
--------------------------------------------------
Normal Inverse Gamma
Training Model


100%|██████████| 50/50 [00:16<00:00,  2.94it/s, acc=0.938, log_prior_normalized=70.4, loss=75.9, lr=0.000141, nll_loss=5.31]   


Test accuracy:  tensor(0.8824)
Test ECE:  tensor(0.0256)
--------------------------------------------------
Isotropic Gaussian
Training Model


100%|██████████| 50/50 [00:28<00:00,  1.78it/s, acc=0.891, log_prior_normalized=8.27e+4, loss=8.27e+4, lr=0.000141, nll_loss=19.2]


Test accuracy:  tensor(0.9059)
Test ECE:  tensor(0.0768)
--------------------------------------------------
Student-T
Training Model


100%|██████████| 50/50 [00:41<00:00,  1.21it/s, acc=0.922, log_prior_normalized=8.49e+4, loss=8.5e+4, lr=0.000141, nll_loss=17.9] 


Test accuracy:  tensor(0.9101)
Test ECE:  tensor(0.0784)
--------------------------------------------------
Laplace
Training Model


100%|██████████| 50/50 [00:31<00:00,  1.59it/s, acc=0.0469, log_prior_normalized=6.28e+4, loss=6.3e+4, lr=0.000141, nll_loss=135]


Test accuracy:  tensor(0.0992)
Test ECE:  tensor(0.0016)
--------------------------------------------------
Gaussian Mixture
Training Model


100%|██████████| 50/50 [00:38<00:00,  1.31it/s, acc=0.922, log_prior_normalized=1.52e+5, loss=1.52e+5, lr=0.000141, nll_loss=12.6]


Test accuracy:  tensor(0.9170)
Test ECE:  tensor(0.0259)
--------------------------------------------------
Gaussian Spike and Slab
Training Model


100%|██████████| 50/50 [01:13<00:00,  1.46s/it, acc=0.922, log_prior_normalized=1.03e+5, loss=1.03e+5, lr=0.000141, nll_loss=17.1]


Test accuracy:  tensor(0.9078)
Test ECE:  tensor(0.0759)
--------------------------------------------------
Mixed Laplace and Uniform
Training Model


100%|██████████| 50/50 [00:29<00:00,  1.68it/s, acc=0.977, log_prior_normalized=1.24e+5, loss=1.24e+5, lr=0.000141, nll_loss=5.62]


Test accuracy:  tensor(0.9181)
Test ECE:  tensor(0.0066)
--------------------------------------------------
Normal Inverse Gamma
Training Model


100%|██████████| 50/50 [00:33<00:00,  1.48it/s, acc=0.969, log_prior_normalized=41.7, loss=48.3, lr=0.000141, nll_loss=5.79]


Test accuracy:  tensor(0.9154)
Test ECE:  tensor(0.0258)
--------------------------------------------------
Isotropic Gaussian
Training Model


100%|██████████| 50/50 [01:09<00:00,  1.39s/it, acc=0.945, log_prior_normalized=8.27e+4, loss=8.28e+4, lr=0.000141, nll_loss=26.9]


Test accuracy:  tensor(0.9296)
Test ECE:  tensor(0.1705)
--------------------------------------------------
Student-T
Training Model


100%|██████████| 50/50 [01:16<00:00,  1.53s/it, acc=0.961, log_prior_normalized=8.5e+4, loss=8.5e+4, lr=0.000141, nll_loss=21.2]  


Test accuracy:  tensor(0.8979)
Test ECE:  tensor(0.1898)
--------------------------------------------------
Laplace
Training Model


100%|██████████| 50/50 [01:02<00:00,  1.25s/it, acc=0.117, log_prior_normalized=6.28e+4, loss=6.31e+4, lr=0.000141, nll_loss=270] 


Test accuracy:  tensor(0.1135)
Test ECE:  tensor(0.0104)
--------------------------------------------------
Gaussian Mixture
Training Model


100%|██████████| 50/50 [01:13<00:00,  1.46s/it, acc=0.984, log_prior_normalized=1.52e+5, loss=1.52e+5, lr=0.000141, nll_loss=11.5]


Test accuracy:  tensor(0.9444)
Test ECE:  tensor(0.0762)
--------------------------------------------------
Gaussian Spike and Slab
Training Model


100%|██████████| 50/50 [02:28<00:00,  2.98s/it, acc=0.938, log_prior_normalized=1.03e+5, loss=1.03e+5, lr=0.000141, nll_loss=27.6]


Test accuracy:  tensor(0.9334)
Test ECE:  tensor(0.1509)
--------------------------------------------------
Mixed Laplace and Uniform
Training Model


100%|██████████| 50/50 [00:54<00:00,  1.09s/it, acc=0.984, log_prior_normalized=1.24e+5, loss=1.24e+5, lr=0.000141, nll_loss=5.41]


Test accuracy:  tensor(0.9412)
Test ECE:  tensor(0.0153)
--------------------------------------------------
Normal Inverse Gamma
Training Model


100%|██████████| 50/50 [01:05<00:00,  1.31s/it, acc=0.984, log_prior_normalized=30.1, loss=41.1, lr=0.000141, nll_loss=9.37] 


Test accuracy:  tensor(0.9335)
Test ECE:  tensor(0.0203)
--------------------------------------------------
Isotropic Gaussian
Training Model


100%|██████████| 50/50 [01:47<00:00,  2.15s/it, acc=0.977, log_prior_normalized=8.27e+4, loss=8.28e+4, lr=0.000141, nll_loss=22.8]


Test accuracy:  tensor(0.9634)
Test ECE:  tensor(0.1056)
--------------------------------------------------
Student-T
Training Model


100%|██████████| 50/50 [02:10<00:00,  2.61s/it, acc=0.953, log_prior_normalized=8.5e+4, loss=8.5e+4, lr=0.000141, nll_loss=31.9] 


Test accuracy:  tensor(0.9630)
Test ECE:  tensor(0.0825)
--------------------------------------------------
Laplace
Training Model


100%|██████████| 50/50 [01:57<00:00,  2.36s/it, acc=0.883, log_prior_normalized=6.29e+4, loss=6.3e+4, lr=0.000141, nll_loss=95.3]   


Test accuracy:  tensor(0.8517)
Test ECE:  tensor(0.2947)
--------------------------------------------------
Gaussian Mixture
Training Model


100%|██████████| 50/50 [02:24<00:00,  2.88s/it, acc=0.992, log_prior_normalized=1.52e+5, loss=1.52e+5, lr=0.000141, nll_loss=11]  


Test accuracy:  tensor(0.9666)
Test ECE:  tensor(0.0353)
--------------------------------------------------
Gaussian Spike and Slab
Training Model


100%|██████████| 50/50 [04:41<00:00,  5.63s/it, acc=0.969, log_prior_normalized=1.03e+5, loss=1.03e+5, lr=0.000141, nll_loss=40.5]


Test accuracy:  tensor(0.9643)
Test ECE:  tensor(0.0857)
--------------------------------------------------
Mixed Laplace and Uniform
Training Model


100%|██████████| 50/50 [01:45<00:00,  2.12s/it, acc=1, log_prior_normalized=1.24e+5, loss=1.24e+5, lr=0.000141, nll_loss=1.81]    


Test accuracy:  tensor(0.9598)
Test ECE:  tensor(0.0117)
--------------------------------------------------
Normal Inverse Gamma
Training Model


100%|██████████| 50/50 [02:11<00:00,  2.63s/it, acc=0.992, log_prior_normalized=17.3, loss=34.6, lr=0.000141, nll_loss=12.3]


Test accuracy:  tensor(0.9579)
Test ECE:  tensor(0.0124)
--------------------------------------------------
Isotropic Gaussian
Training Model


100%|██████████| 50/50 [03:34<00:00,  4.30s/it, acc=0.992, log_prior_normalized=8.27e+4, loss=8.28e+4, lr=0.000141, nll_loss=21.8]


Test accuracy:  tensor(0.9761)
Test ECE:  tensor(0.0306)
--------------------------------------------------
Student-T
Training Model


100%|██████████| 50/50 [04:19<00:00,  5.20s/it, acc=0.992, log_prior_normalized=8.5e+4, loss=8.5e+4, lr=0.000141, nll_loss=16.9] 


Test accuracy:  tensor(0.9755)
Test ECE:  tensor(0.0344)
--------------------------------------------------
Laplace
Training Model


100%|██████████| 50/50 [03:59<00:00,  4.79s/it, acc=0.945, log_prior_normalized=6.3e+4, loss=6.31e+4, lr=0.000141, nll_loss=80.7]   


Test accuracy:  tensor(0.9233)
Test ECE:  tensor(0.1213)
--------------------------------------------------
Gaussian Mixture
Training Model


100%|██████████| 50/50 [04:47<00:00,  5.75s/it, acc=0.992, log_prior_normalized=1.52e+5, loss=1.52e+5, lr=0.000141, nll_loss=15.1]


Test accuracy:  tensor(0.9785)
Test ECE:  tensor(0.0200)
--------------------------------------------------
Gaussian Spike and Slab
Training Model


100%|██████████| 50/50 [09:24<00:00, 11.30s/it, acc=0.984, log_prior_normalized=1.03e+5, loss=1.03e+5, lr=0.000141, nll_loss=30.6]


Test accuracy:  tensor(0.9768)
Test ECE:  tensor(0.0364)
--------------------------------------------------
Mixed Laplace and Uniform
Training Model


100%|██████████| 50/50 [03:31<00:00,  4.23s/it, acc=1, log_prior_normalized=1.24e+5, loss=1.24e+5, lr=0.000141, nll_loss=6.83]    


Test accuracy:  tensor(0.9738)
Test ECE:  tensor(0.0092)
--------------------------------------------------
Normal Inverse Gamma
Training Model


100%|██████████| 50/50 [04:24<00:00,  5.29s/it, acc=0.992, log_prior_normalized=14.1, loss=48.8, lr=0.000141, nll_loss=28.6]


Test accuracy:  tensor(0.9684)
Test ECE:  tensor(0.0091)
--------------------------------------------------
Isotropic Gaussian
Training Model


  6%|▌         | 3/50 [00:33<08:38, 11.04s/it, acc=0.992, log_prior_normalized=82800.0, loss=8.29e+4, lr=0.00097, nll_loss=26.2]


KeyboardInterrupt: 

In [13]:
# print full pandas dataframe
results

Unnamed: 0,Prior,Sample Size,Test Accuracy,Test ECE
0,Isotropic Gaussian,3750.0,0.8783,0.13070764
1,Student-T,3750.0,0.877,0.15161534
2,Laplace,3750.0,0.1135,0.012761287
3,Gaussian Mixture,3750.0,0.886,0.076357536
4,Gaussian Spike and Slab,3750.0,0.877,0.12927525
5,Mixed Laplace and Uniform,3750.0,0.8859,0.03075873
6,Normal Inverse Gamma,3750.0,0.8824,0.025573293
7,Isotropic Gaussian,7500.0,0.9059,0.07683899
8,Student-T,7500.0,0.9101,0.078380935
9,Laplace,7500.0,0.0992,0.0015765876
