In [1]:
%load_ext autoreload
%autoreload 2
#%matplotlib inline
import os
import sys
N_up = 2
nb_dir = '/'.join(os.getcwd().split('/')[:-N_up])
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

N_up = 1
nb_dir = '/'.join(os.getcwd().split('/')[:-N_up])
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm

In [3]:
nb_dir = str(Path('~/Code/bayesian-lottery-tickets').expanduser())
if nb_dir not in sys.path:
	sys.path.append(nb_dir)
from src.scripts.train_regression import train_loop
from src.datasets.additional_gap_loader import load_matern_1d
from src.utils import Datafeed, homo_Gauss_mloglike
from src.models.MLPs import res_MLP
from pathlib import Path

In [4]:
# define train dataloader
X_train, y_train = load_matern_1d('../data')
trainset = Datafeed(torch.Tensor(X_train), torch.Tensor(y_train), transform=None)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=512, shuffle=True, pin_memory=True, num_workers=2)

# define test dataloader
xlim = [-2.0, 1.9]# [-1.35, 1.6]
X_test = torch.Tensor(np.linspace(xlim[0], xlim[1], 800)).unsqueeze(1)
testset = Datafeed(torch.Tensor(X_test), torch.Tensor(X_test), transform=None)
test_loader = torch.utils.data.DataLoader(testset, batch_size=2048, shuffle=False, pin_memory=True, num_workers=2)

plt.figure(dpi=100)
plt.scatter(X_train, y_train)
plt.show()


In [5]:
# construct single layer neural network
def get_model():
    torch.manual_seed(711)
    return torch.nn.Sequential(
        torch.nn.Linear(1, 50), torch.nn.Tanh(), torch.nn.Linear(50, 50), torch.nn.Tanh(), torch.nn.Linear(50, 1)
    )
model = get_model()

model_path = Path('../data/model3.pth')

if model_path.exists():
    model.load_state_dict(torch.load(model_path))
else:
    # train MAP
    n_epochs = 256
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
    for i in tqdm(range(n_epochs)):
        for X, y in train_loader:
            optimizer.zero_grad()
            loss = criterion(model(X), y)
            loss.backward()
            optimizer.step()    
    torch.save(model.state_dict(), model_path)

In [6]:
model.eval()
with torch.no_grad():
    y_test = model(X_test).detach().cpu()

plt.figure(dpi=100)
plt.plot(X_test, y_test)
plt.scatter(X_train, y_train, c='r')


<matplotlib.collections.PathCollection at 0x7f5254706460>

In [7]:
def plot_1d_regression(X_train, y_train, y_test_mean, y_test_std, postfix):
	plt.figure(dpi=100)
	plt.scatter(X_train, y_train, c='r')
	plt.plot(X_test, y_test_mean, 'b')
	plt.fill_between(X_test[:, 0],
					y_test_mean[:, 0] + y_test_std[:, 0, 0],
					y_test_mean[:, 0] - y_test_std[:, 0, 0],
					color='b', alpha=0.3)
	plt.savefig(f'../data/plot_{postfix}.png')

In [8]:
from laplace import Laplace

In [9]:
lap_model = Laplace(model, 'regression', subset_of_weights='all', hessian_structure='full')
lap_model.fit(train_loader)
model.eval()
with torch.no_grad():
    y_test_mean, y_test_std = lap_model(X_test)
plot_1d_regression(X_train, y_train, y_test_mean, y_test_std, 'all')

    Found GPU%d %s which is of cuda capability %d.%d.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is %d.%d.
    


In [19]:
from laplace.utils import LargestMagnitudeSubnetMask, RandomSubnetMask, LastLayerSubnetMask, LargestVarianceDiagLaplaceSubnetMask, LargestVarianceSWAGSubnetMask
from laplace import DiagLaplace

for name, mask in [('mag', LargestMagnitudeSubnetMask), ('random', RandomSubnetMask), ('last', LastLayerSubnetMask), ('var-lap', LargestVarianceDiagLaplaceSubnetMask), ('var-swag', LargestVarianceSWAGSubnetMask)]:
    n_params_subnet = 64
    subnetmask_kwargs = dict(model=model)
    if mask != LastLayerSubnetMask:
        subnetmask_kwargs.update(n_params_subnet=n_params_subnet)
    if mask == LargestVarianceSWAGSubnetMask:
        subnetmask_kwargs.update(likelihood='regression')
    elif mask == LargestVarianceDiagLaplaceSubnetMask:
        diag_laplace_model = DiagLaplace(model, 'regression')
        subnetmask_kwargs.update(diag_laplace_model=diag_laplace_model)
    subnet_mask = mask(**subnetmask_kwargs)
    subnet_indices = subnet_mask.select(train_loader)

    lap_model = Laplace(model, 'regression', subset_of_weights='subnetwork', hessian_structure='full', subnetwork_indices=subnet_indices)
    lap_model.fit(train_loader)
    model.eval()
    with torch.no_grad():
        y_test_mean, y_test_std = lap_model(X_test)
    plot_1d_regression(X_train, y_train, y_test_mean, y_test_std, f'{name}_{n_params_subnet}')



In [73]:
from laplace.utils import GreedyMarginalLikelihoodSubnetMask
from laplace import FullLaplace

n_params_subnet = 256
laplace_model = FullLaplace(model, 'regression')
subnet_mask = GreedyMarginalLikelihoodSubnetMask(model, n_params_subnet, laplace_model)
subnet_indices = subnet_mask.select(train_loader)



tensor(78.8184)
n_params_subnet=  1: 2700 (logdet=72.824) [0.34s]
n_params_subnet=  2: 2692 (logdet=66.859) [0.46s]
n_params_subnet=  3: 2660 (logdet=61.785) [0.58s]
n_params_subnet=  4: 2666 (logdet=56.716) [0.71s]
n_params_subnet=  5: 2663 (logdet=52.708) [0.93s]
n_params_subnet=  6: 2677 (logdet=48.876) [1.07s]
n_params_subnet=  7: 2686 (logdet=45.082) [1.21s]
n_params_subnet=  8: 2699 (logdet=41.557) [1.34s]
n_params_subnet=  9: 2661 (logdet=38.891) [1.49s]
n_params_subnet= 10:   97 (logdet=37.070) [1.64s]
n_params_subnet= 11: 2650 (logdet=35.390) [1.79s]
n_params_subnet= 12:   21 (logdet=33.885) [1.95s]
n_params_subnet= 13: 2651 (logdet=32.387) [2.18s]
n_params_subnet= 14: 2656 (logdet=31.176) [2.34s]
n_params_subnet= 15: 2613 (logdet=30.393) [2.49s]
n_params_subnet= 16: 2611 (logdet=29.764) [2.64s]
n_params_subnet= 17: 2662 (logdet=29.149) [2.80s]
n_params_subnet= 18: 2657 (logdet=28.547) [2.97s]
n_params_subnet= 19:   71 (logdet=27.961) [3.14s]
n_params_subnet= 20: 2694 (logdet=

In [74]:
lap_model = Laplace(model, 'regression', subset_of_weights='subnetwork', hessian_structure='full', subnetwork_indices=subnet_indices)
lap_model.fit(train_loader)
#lap_model.prior_precision = n_params_subnet / 2701
model.eval()
with torch.no_grad():
    y_test_mean, y_test_std = lap_model(X_test)
plot_1d_regression(X_train, y_train, y_test_mean, y_test_std, f'greedy_min_diff_{n_params_subnet}')

