# FashionMNIST Classification Experiments & Ablations

In [16]:
# SET ENVIRONMENT VARIABLES
%env TF_CPP_MIN_LOG_LEVEL=3
%env XLA_FLAGS=--xla_gpu_deterministic_reductions
%env TF_CUDNN_DETERMINISTIC=1
%env TF_DETERMINISTIC_OPS=1
%env PYTHONHASHSEED=0
# %env XLA_PYTHON_CLIENT_MEM_FRACTION=0.95  # can help avoid OOM errors

# Load modules
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Set Jupyter notebook width to display training progress correctly
from IPython.display import display, HTML
display(HTML("<style>.container { width:150% !important; }</style>"))

env: TF_CPP_MIN_LOG_LEVEL=3
env: XLA_FLAGS=--xla_gpu_deterministic_reductions
env: TF_CUDNN_DETERMINISTIC=1
env: TF_DETERMINISTIC_OPS=1
env: PYTHONHASHSEED=0


## Guide to reading the printouts below:

### OOD uncertainty estimation metrics:
- For all of the below: the first entry is for MNIST, the second entry is for NotMNIST, and the third entry is for KMNIST
- `auc ent`: OOD detection AUROC based on predictive entropy
- `auc alea`: OOD detection AUROC based on aleatoric uncertainty only
- `auc ent`: OOD detection AUROC based on epistemic uncertainty only

### Test evaluation metrics:
- `te_nll`: Test negative log-likelihood
- `te_acc`: Test accuracy
- `te ece`: Test expected calibration error (ECE)

## A. FSVI Best Overall: (1) Full covariance in KL + (2) dropout (otherwise same as baseline below):

In [24]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn_dropout --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 0.006 --prior_type bnn_induced --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --full_cov --dropout_rate 0.1

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"0.006",
    "prior_covs":[
        0.0
    ],
    "prior_type":"bnn_induced",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.1,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "featu

  18     0.075    97.290     0.280    92.360     0.015  |   99.305   97.196   99.972  |  97.469   95.704   98.578  |  99.630   97.836   99.999  |      0.158  |    1.908    1.660    2.047  |     0.000     0.000     0.000     56.007        3.357
  19     0.077    96.910     0.301    92.330     0.021  |   98.959   97.161   99.977  |  97.564   95.849   99.034  |  99.346   97.637   99.998  |      0.146  |    1.869    1.670    2.041  |     0.000     0.000     0.000     55.971        3.393
  20     0.063    97.660     0.291    92.690     0.015  |   99.431   97.039   99.974  |  98.193   95.534   98.761  |  99.689   97.652   99.999  |      0.149  |    1.901    1.664    2.018  |     0.000     0.000     0.000     56.034        3.408
  21     0.068    97.290     0.316    92.270     0.021  |   99.623   97.365   99.930  |  98.619   96.324   99.023  |  99.840   97.839   99.999  |      0.146  |    1.933    1.708    2.015  |     0.000     0.000     0.000     56.012        3.381
  22     0.042    98.520

## B. FSVI Baseline:

In [3]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.0,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.025    98.980     0.356    92.310     0.043  |   99.690   95.582   99.998  |  99.721   95.697   99.998  |  82.459   88.812   62.150  |      0.086  |    2.090    1.309    2.284  |     0.000     0.000     0.000     19.707        1.734
  19     0.042    98.340     0.389    91.850     0.045  |   99.577   95.193   99.999  |  99.626   95.361   99.999  |  82.962   87.782   64.854  |      0.090  |    2.062    1.289    2.278  |     0.000     0.000     0.000     19.709        1.722
  20     0.024    99.100     0.376    92.340     0.044  |   99.697   95.410   99.999  |  99.736   95.566   99.999  |  81.640   88.007   62.694  |      0.082  |    2.103    1.303    2.282  |     0.000     0.000     0.000     19.713        1.717
  21     0.037    98.530     0.420    92.110     0.046  |   99.794   95.757  100.000  |  99.826   95.939  100.000  |  81.071   87.620   59.260  |      0.081  |    2.139    1.338    2.288  |     0.000     0.000     0.000     19.671        1.737
  22     0.010    99.720

## 1. Full covariance in KL (otherwise same as baseline):

In [4]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 0.005 --prior_type bnn_induced --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --full_cov

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"0.005",
    "prior_covs":[
        0.0
    ],
    "prior_type":"bnn_induced",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.0,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "featu

  18     0.048    98.420     0.302    92.060     0.038  |   99.779   97.340   99.988  |  99.636   97.125   99.869  |  99.894   97.776   99.998  |      0.109  |    2.116    1.780    2.185  |     0.000     0.000     0.000     54.003        1.801
  19     0.044    98.450     0.317    92.250     0.038  |   99.848   97.484   99.988  |  99.749   97.274   99.907  |  99.933   97.920   99.999  |      0.100  |    2.115    1.726    2.159  |     0.000     0.000     0.000     54.019        1.789
  20     0.036    98.900     0.321    92.170     0.038  |   99.908   97.155   99.987  |  99.816   96.920   99.888  |  99.963   97.641   99.998  |      0.102  |    2.131    1.735    2.161  |     0.000     0.000     0.000     53.988        1.786
  21     0.080    96.920     0.378    90.920     0.049  |   99.780   97.164   99.985  |  99.601   96.884   99.816  |  99.890   97.628   99.999  |      0.107  |    2.112    1.729    2.144  |     0.000     0.000     0.000     53.814        1.750
  22     0.034    98.750

## 2. Stochastic linearization estimator

### 2.1. (1) Linearization about value sampled from variational distribution over parameters instead of linearization about variational mean (otherwise same as baseline):

In [7]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --stochastic_linearization

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.0,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.030    98.880     0.363    92.140     0.044  |   99.840   96.125  100.000  |  99.858   96.208  100.000  |  87.255   89.474   85.009  |      0.086  |    2.141    1.396    2.287  |     0.000     0.000     0.000     20.111        1.737
  19     0.046    98.220     0.383    92.070     0.046  |   99.665   95.455   99.999  |  99.705   95.584   99.999  |  87.942   89.412   85.942  |      0.082  |    2.085    1.301    2.281  |     0.000     0.000     0.000     20.098        1.747
  20     0.021    99.240     0.378    92.380     0.044  |   99.636   95.232   99.999  |  99.681   95.335   99.999  |  88.840   89.650   87.523  |      0.080  |    2.055    1.301    2.276  |     0.000     0.000     0.000     20.105        1.753
  21     0.047    98.130     0.445    92.020     0.049  |   99.783   96.027   99.999  |  99.818   96.128  100.000  |  88.966   89.672   87.728  |      0.076  |    2.121    1.412    2.288  |     0.000     0.000     0.000     20.085        1.752
  22     0.012    99.660

### 2.2. (1) Linearization about value sampled from variational distribution over parameters instead of linearization about variational mean + (2) gradient flow through linearization evaluation points (otherwise same as baseline):

In [13]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --stochastic_linearization --grad_flow_jacobian

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.0,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.040    98.870     0.279    92.220     0.034  |   99.840   96.956   99.995  |  98.005   95.867   98.080  |  99.933   97.645   99.998  |      0.112  |    1.946    1.538    1.985  |     0.000     0.000     0.000     50.628        1.737
  19     0.039    98.630     0.299    92.460     0.035  |   99.755   96.208   99.990  |  98.003   95.045   98.405  |  99.879   97.090   99.999  |      0.104  |    1.915    1.449    1.986  |     0.000     0.000     0.000     50.662        1.761
  20     0.037    98.760     0.293    92.310     0.035  |   99.747   96.636   99.986  |  98.559   95.765   98.731  |  99.869   97.361   99.998  |      0.108  |    1.966    1.526    2.011  |     0.000     0.000     0.000     50.608        1.758
  21     0.043    98.310     0.340    91.890     0.043  |   99.807   96.504   99.990  |  98.491   95.512   98.581  |  99.895   97.178   99.999  |      0.097  |    1.929    1.477    1.975  |     0.000     0.000     0.000     50.601        1.734
  22     0.023    99.270

## 3. Learn only final-layer variational distribution:

### 3.1. (1) Randomness in non-final layers generated by dropout (otherwise same as baseline)

In [8]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn_dropout --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --final_layer_variational --dropout_rate 0.1

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.1,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.040    98.630     0.287    92.880     0.021  |   99.824   95.997  100.000  |  99.853   95.980  100.000  |  80.965   85.824   74.767  |      0.130  |    2.145    1.589    2.294  |     0.000     0.000     0.000     10.942        3.143
  19     0.036    98.740     0.311    92.460     0.028  |   99.781   96.260  100.000  |  99.824   96.296  100.000  |  83.279   86.812   77.118  |      0.120  |    2.101    1.544    2.291  |     0.000     0.000     0.000     11.051        3.144
  20     0.029    99.070     0.301    93.000     0.024  |   99.741   95.347  100.000  |  99.796   95.393  100.000  |  82.693   85.986   77.913  |      0.119  |    2.099    1.516    2.292  |     0.000     0.000     0.000     11.068        3.130
  21     0.038    98.620     0.354    92.330     0.032  |   99.770   96.439  100.000  |  99.831   96.517  100.000  |  82.407   85.959   78.724  |      0.113  |    2.137    1.595    2.294  |     0.000     0.000     0.000     10.835        3.109
  22     0.026    99.150

### 3.2. (1) Randomness in non-final layers generated by dropout + (2) linearization about value sampled from variational distribution over parameters instead about variational mean (otherwise same as baseline)

In [9]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn_dropout --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --final_layer_variational --dropout_rate 0.1 --stochastic_linearization

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.1,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.040    98.630     0.287    92.880     0.021  |   99.824   95.997  100.000  |  99.853   95.980  100.000  |  80.965   85.824   74.767  |      0.130  |    2.145    1.589    2.294  |     0.000     0.000     0.000     10.858        3.135
  19     0.036    98.740     0.311    92.460     0.028  |   99.781   96.260  100.000  |  99.824   96.296  100.000  |  83.279   86.812   77.118  |      0.120  |    2.101    1.544    2.291  |     0.000     0.000     0.000     10.910        3.118
  20     0.029    99.070     0.301    93.000     0.024  |   99.741   95.347  100.000  |  99.796   95.393  100.000  |  82.693   85.986   77.913  |      0.119  |    2.099    1.516    2.292  |     0.000     0.000     0.000     10.993        3.127
  21     0.038    98.620     0.354    92.330     0.032  |   99.770   96.439  100.000  |  99.831   96.517  100.000  |  82.407   85.959   78.724  |      0.113  |    2.137    1.595    2.294  |     0.000     0.000     0.000     11.007        3.155
  22     0.026    99.150

### 3.3. (1) Randomness in non-final layers generated by dropout + (2) linearization about value sampled from variational distribution over parameters instead about variational mean + (3) gradient flow through linearization evaluation points (otherwise same as baseline)

In [11]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn_dropout --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --final_layer_variational --dropout_rate 0.1 --stochastic_linearization --grad_flow_jacobian

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.1,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.051    98.470     0.281    92.710     0.020  |   99.953   96.978   99.987  |  98.334   95.739   98.277  |  99.980   97.534   99.997  |      0.140  |    2.064    1.642    2.085  |     0.000     0.000     0.000     11.009        3.128
  19     0.050    98.200     0.319    92.550     0.026  |   99.928   97.126   99.984  |  98.708   96.068   98.874  |  99.966   97.537   99.998  |      0.122  |    2.020    1.576    2.078  |     0.000     0.000     0.000     11.025        3.136
  20     0.039    98.770     0.287    92.990     0.019  |   99.893   97.387   99.979  |  98.400   96.247   98.422  |  99.951   97.805   99.996  |      0.133  |    2.030    1.666    2.069  |     0.000     0.000     0.000     11.051        3.141
  21     0.040    98.620     0.311    92.900     0.023  |   99.904   97.025   99.971  |  98.849   96.017   98.860  |  99.959   97.410   99.998  |      0.126  |    2.045    1.631    2.081  |     0.000     0.000     0.000     10.921        3.116
  22     0.029    99.040

### 3.4. (0) Context set constructed from traiing set + (1) randomness in non-final layers generated by dropout + (2) linearization about value sampled from variational distribution over parameters instead about variational mean + (3) gradient flow through linearization evaluation points (otherwise same as baseline):

In [12]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn_dropout --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type train_pixel_rand_0.5 --kl_scale none --n_samples 5 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1 --final_layer_variational --dropout_rate 0.1 --stochastic_linearization --grad_flow_jacobian

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.1,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"train_pixel_rand_0.5",
    "inducing_input_ood_data":[
        "not_specified"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",


  18     0.115    96.380     0.272    93.190     0.012  |   96.051   96.352   95.789  |  94.258   90.044   93.582  |  97.225   98.022   97.157  |      0.256  |    1.344    1.440    1.330  |     0.000     0.000     0.000     11.192        3.184
  19     0.113    96.630     0.278    93.170     0.014  |   95.722   95.783   94.468  |  94.042   89.741   92.216  |  96.854   97.589   96.054  |      0.280  |    1.325    1.399    1.243  |     0.000     0.000     0.000     11.194        3.182
  20     0.151    94.970     0.287    93.060     0.022  |   95.896   96.720   96.320  |  94.299   90.203   94.094  |  96.814   98.312   97.517  |      0.307  |    1.431    1.496    1.415  |     0.000     0.000     0.000     11.266        3.184
  21     0.097    97.300     0.289    92.840     0.011  |   96.576   97.772   96.272  |  95.185   93.321   94.343  |  97.216   98.635   97.324  |      0.268  |    1.405    1.547    1.322  |     0.000     0.000     0.000     11.370        3.180
  22     0.105    96.810

## 4. Training with more MC samples for expected log-likelihood estimation (otherwise same as baseline):

In [6]:
path = os.path.abspath(os.path.dirname(os.getcwd()))

if os.getcwd()[-4:] == 'fsvi':
    pass
else:
    os.chdir(path)

%run run_base.py\
--data_training fashionmnist --data_ood mnist notmnist kmnist --model fsvi_cnn --architecture four_layers --activation relu --epochs 30 --learning_rate 5e-4 --optimizer adam --batch_size 128 --prior_mean 0 --prior_cov 10 --prior_type fixed --inducing_points 4 --n_marginals 1 --kl_sup max --inducing_input_type ood_rand --inducing_input_ood_data kmnist --kl_scale none --n_samples 10 --logging_frequency 500 --seed 0 --debug --save --save_path tmp --feature_update 1

Making GPU operations deterministic by setting os.environ["XLA_FLAGS"] = "--xla_gpu_deterministic_reductions""and os.environ["TF_CUDNN_DETERMINISTIC"] = "1"

Device: gpu

Input arguments:
 {
    "data_training":"fashionmnist",
    "data_ood":[
        "mnist",
        "notmnist",
        "kmnist"
    ],
    "optimizer":"adam",
    "optimizer_var":"not_specified",
    "momentum":0.0,
    "momentum_var":0.0,
    "schedule":"not_specified",
    "architecture":"four_layers",
    "activation":"relu",
    "prior_mean":"0",
    "prior_cov":"10",
    "prior_covs":[
        0.0
    ],
    "prior_type":"fixed",
    "epochs":30,
    "start_var_opt":0,
    "batch_size":128,
    "learning_rate":0.0005,
    "learning_rate_var":0.001,
    "dropout_rate":0.0,
    "regularization":0,
    "n_marginals":1,
    "n_condition":128,
    "inducing_input_type":"ood_rand",
    "inducing_input_ood_data":[
        "kmnist"
    ],
    "inducing_input_ood_data_size":50000,
    "kl_scale":"none",
    "feature_map_ja

  18     0.021    99.250     0.374    92.160     0.044  |   99.796   95.795  100.000  |  99.823   95.916  100.000  |  83.415   88.548   61.576  |      0.086  |    2.115    1.345    2.287  |     0.000     0.000     0.000     29.227        1.962
  19     0.031    98.830     0.381    91.990     0.045  |   99.739   94.693   99.998  |  99.772   94.826   99.999  |  84.177   87.820   64.174  |      0.087  |    2.100    1.263    2.280  |     0.000     0.000     0.000     29.048        1.984
  20     0.020    99.350     0.389    92.400     0.042  |   99.820   95.453   99.999  |  99.848   95.619  100.000  |  82.050   87.586   59.112  |      0.084  |    2.127    1.314    2.287  |     0.000     0.000     0.000     29.046        1.941
  21     0.027    99.000     0.431    91.740     0.050  |   99.829   96.686  100.000  |  99.858   96.774  100.000  |  80.567   87.653   50.664  |      0.081  |    2.140    1.478    2.294  |     0.000     0.000     0.000     28.995        1.956
  22     0.011    99.700

# Thank you for reviewing our results!