# Subnetwork Inference

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:

import os
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from omegaconf import OmegaConf

initialize(version_base=None, config_path="configuration")

hydra.initialize()

In [3]:
from main import set_seed
import hydra
import numpy as np
import logging
import os
import random

import torch
import copy
import pickle

from laplace import Laplace
from enum import Enum, auto
from pathlib import Path
from collections import defaultdict
from laplace.utils import LargestVarianceDiagLaplaceSubnetMask
from strategies.pruning import OBDSubnetMask

from hydra.core.config_store import ConfigStore
from omegaconf import OmegaConf

from configuration.config import ExperimentConfig
from models.nets import create_mlp
from data.uci_datasets import UCIData
from trainer import ModelTrainer
from metrics import nll_bayesian, nll_map

In [4]:
config = compose(config_name="uci.yaml")
set_seed(config.seed)
data = UCIData(config.data.path)
meta_data = data.get_metadata()

   
train_dataloader, val_dataloader, test_dataloader = data.get_dataloaders(
            dataset=config.data.name,
            batch_size=config.trainer.batch_size,
            seed=config.data.seed,
            val_size=config.data.val_size,
            split_index=0,
            gap=config.data.gap,
        )

    
model = create_mlp(
            input_size=config.model.input_size,
            hidden_sizes=config.model.hidden_sizes,
            output_size=config.model.output_size,
        )
model = model.double()

trainer = ModelTrainer(config.trainer)

map_model, sigma = trainer.train(
            model=model,
            train_dataloader=train_dataloader,
            val_dataloader=val_dataloader,
        )

nll, err, count = trainer.evaluate(
            model=map_model, sigma=sigma, dataloader=test_dataloader
        )
print(nll)

1.1531623411907508


In [11]:
model_copy = copy.deepcopy(model)
la, prior_precision = trainer.train_la_posthoc(
            model=model_copy,
            dataloader=train_dataloader,
            subset_of_weights="all",
            hessian_structure="diag",
            sigma_noise=sigma,
            prior_mean=config.trainer.la.prior_mean,
            val_dataloader=val_dataloader,
        )

In [18]:
la.posterior_variance

tensor([0.0176, 0.0145, 0.0156,  ..., 0.0089, 0.0041, 0.0006],
       dtype=torch.float64)

In [17]:
torch.linalg.inv(torch.eye(la.posterior_precision.shape[0]) * la.posterior_precision).diag()

tensor([0.0176, 0.0145, 0.0156,  ..., 0.0089, 0.0041, 0.0006],
       dtype=torch.float64)

In [10]:
la.posterior_precision

<laplace.utils.matrix.KronDecomposed at 0x2845b5220>

In [7]:
model_copy = copy.deepcopy(model)
la, prior_precision = trainer.train_la_posthoc(
            model=model_copy,
            dataloader=train_dataloader,
            subset_of_weights="all",
            hessian_structure="kron",
            sigma_noise=sigma,
            prior_mean=config.trainer.la.prior_mean,
            val_dataloader=val_dataloader,
        )

In [8]:
la.posterior_variance

AttributeError: 'KronLaplace' object has no attribute 'posterior_variance'

In [1]:
I = torch.linalg.inv(la.H.to_matrix())
torch.linalg.eigvalsh(I)

NameError: name 'torch' is not defined

In [41]:
def kron(t1, t2):
    """Computes the Kronecker product between two tensors.

    Parameters
    ----------
    t1 : torch.Tensor
    t2 : torch.Tensor

    Returns
    -------
    kron_product : torch.Tensor
    """
    t1_height, t1_width = t1.size()
    t2_height, t2_width = t2.size()
    out_height = t1_height * t2_height
    out_width = t1_width * t2_width

    tiled_t2 = t2.repeat(t1_height, t1_width)
    expanded_t1 = (
        t1.unsqueeze(2)
          .unsqueeze(3)
          .repeat(1, t2_height, t2_width, 1)
          .view(out_height, out_width)
    )

    return expanded_t1 * tiled_t2

def block_diag(blocks):
    """Compose block-diagonal matrix of individual blocks.

    Parameters
    ----------
    blocks : list[torch.Tensor]

    Returns
    -------
    M : torch.Tensor
    """
    P = sum([b.shape[0] for b in blocks])
    M = torch.zeros(P, P)
    p_cur = 0
    for block in blocks:
        p_block = block.shape[0]
        M[p_cur:p_cur+p_block, p_cur:p_cur+p_block] = block
        p_cur += p_block
    return M

In [50]:
blocks = list()
for Qs, ls, delta in zip(K.eigenvectors, K.eigenvalues, K.deltas):
    if len(ls) == 1:
        Q, l = Qs[0], ls[0]
        blocks.append(Q @ torch.diag(torch.pow(l + delta, 1)) @ Q.T)
    else:
        Q1, Q2 = Qs
        l1, l2 = ls
        Q = kron(Q1, Q2)
        l = torch.pow(torch.ger(l1, l2) + delta, 1)
        L = torch.diag(l.flatten())
        B =Q @ L @ Q.T
        B = torch.linalg.inv(B)
        blocks.append(B)

I = block_diag(blocks)


tensor([[ 1.0000e+00, -3.8579e-07,  1.0005e-06,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 2.1087e-07,  1.0000e+00,  2.5885e-07,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-9.5635e-07, -2.7336e-07,  1.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.0000e+00,
         -2.1851e-06,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -2.8605e-06,
          1.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  8.2084e+05]])

torch.Size([1, 3201, 3201])

In [13]:
torch.linalg.inv(la.H)

_LinAlgError: linalg.inv: The diagonal element 773 is zero, the inversion could not be completed because the input matrix is singular.

## Experiments using UCI gap datasets

### Reproduce results using the sub-network selection strategy proposed in Daxberger et al., 2020

### Results of using pruning methods as a sub-network selection strategy


### Results of using KFAC approximated sub-network selection strategy

## Conclusion