In [1]:
import logging
import os
from os import path
from sklearn.model_selection import KFold
import pandas as pd
import zipfile
import urllib.request

from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms
from PIL import Image

class UCIDatasets():
    def __init__(self,  name,  data_path="", n_splits = 10):
        self.datasets = {
            "housing": "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data",
            "concrete": "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls",
            "energy": "http://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx",
            "power": "https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip",
            "wine": "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",
            "yacht": "http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data"}
        self.data_path = data_path
        self.name = name
        self.n_splits = n_splits
        self._load_dataset()


    def _load_dataset(self):
        if self.name not in self.datasets:
            raise Exception("Not known dataset!")
        if not path.exists(self.data_path+"UCI"):
            os.mkdir(self.data_path+"UCI")

        url = self.datasets[self.name]
        file_name = url.split('/')[-1]
        if not path.exists(self.data_path+"UCI/" + file_name):
            urllib.request.urlretrieve(
                self.datasets[self.name], self.data_path+"UCI/" + file_name)
        data = None


        if self.name == "housing":
            data = pd.read_csv(self.data_path+'UCI/housing.data',
                        header=0, delimiter="\s+").values
            self.data = data[np.random.permutation(np.arange(len(data)))]

        elif self.name == "concrete":
            data = pd.read_excel(self.data_path+'UCI/Concrete_Data.xls',
                               header=0).values
            self.data = data[np.random.permutation(np.arange(len(data)))]
        elif self.name == "energy":
            data = pd.read_excel(self.data_path+'UCI/ENB2012_data.xlsx',
                                 header=0).values
            self.data = data[np.random.permutation(np.arange(len(data)))]
        elif self.name == "power":
            zipfile.ZipFile(self.data_path +"UCI/CCPP.zip").extractall("output_dir/")
            data = pd.read_excel('output_dir/CCPP/Folds5x2_pp.xlsx', header=0).values
            np.random.shuffle(data)
            self.data = data
        elif self.name == "wine":
            data = pd.read_csv(self.data_path + 'UCI/winequality-red.csv',
                               header=1, delimiter=';').values
            self.data = data[np.random.permutation(np.arange(len(data)))]

        elif self.name == "yacht":
            data = pd.read_csv(self.data_path + 'UCI/yacht_hydrodynamics.data',
                               header=1, delimiter='\s+').values
            self.data = data[np.random.permutation(np.arange(len(data)))]

        kf = KFold(n_splits=self.n_splits)
        self.in_dim = data.shape[1] - 1
        self.out_dim = 1
        self.data_splits = kf.split(data)
        self.data_splits = [(idx[0], idx[1]) for idx in self.data_splits]

    def get_split(self, split=-1, train='train'):
        if split == -1:
            split = 0
        if 0<=split and split<self.n_splits:
            trainval_index, test_index = self.data_splits[split]
            tv_len = len(trainval_index)
            train_index = trainval_index[:int(tv_len*.8)]
            val_index = trainval_index[int(tv_len*.8):]
            # print('trainval idx', trainval_index)
            # print('train idx', train_index)
            # print('val idx', val_index)
            # print('test idx', test_index)

            x_train, y_train = self.data[train_index,
                                    :self.in_dim], self.data[train_index, self.in_dim:]
            x_val, y_val = self.data[val_index,
                                    :self.in_dim], self.data[val_index, self.in_dim:]
            x_trainval, y_trainval = self.data[trainval_index,
                                    :self.in_dim], self.data[trainval_index, self.in_dim:]
            x_test, y_test = self.data[test_index, :self.in_dim], self.data[test_index, self.in_dim:]

            y_train_means, y_train_stds = y_train.mean(axis=0), y_train.var(axis=0)**0.5
            y_trainval_means, y_trainval_stds = y_trainval.mean(axis=0), y_trainval.var(axis=0)**0.5
            # y_train = (y_train - y_means)/y_stds
            # y_test = (y_test - y_means)/y_stds
            x_train_means, x_train_stds = x_train.mean(axis=0), x_train.var(axis=0)**0.5
            x_trainval_means, x_trainval_stds = x_trainval.mean(axis=0), x_trainval.var(axis=0)**0.5
            if train == 'train':
                x_train = (x_train - x_train_means)/x_train_stds
                y_train = (y_train - y_train_means)
                inps = torch.from_numpy(x_train).float()
                tgts = torch.from_numpy(y_train).float()
                train_data = torch.utils.data.TensorDataset(inps, tgts)
                return train_data
            elif train == 'val':
                # normalized val with train stats
                x_val = (x_val - x_train_means)/x_train_stds
                y_val = (y_val - y_train_means)
                inps = torch.from_numpy(x_val).float()
                tgts = torch.from_numpy(y_val).float()
                val_data = torch.utils.data.TensorDataset(inps, tgts)
                return val_data

            elif train == 'trainval':
                # normalized trainval with trainval stats
                x_trainval = (x_trainval - x_trainval_means)/x_trainval_stds
                y_trainval = (y_trainval - y_trainval_means)
                inps = torch.from_numpy(x_trainval).float()
                tgts = torch.from_numpy(y_trainval).float()
                trainval_data = torch.utils.data.TensorDataset(inps, tgts)
                return trainval_data


            elif train == 'test':
                # normalized test with trainval stats
                x_test = (x_test - x_trainval_means)/x_trainval_stds
                y_test = (y_test - y_trainval_means)
                inps = torch.from_numpy(x_test).float()
                tgts = torch.from_numpy(y_test).float()
                test_data = torch.utils.data.TensorDataset(inps, tgts)
                return test_data
            else:
                raise ValueError()

mnist_train_dataset = datasets.FashionMNIST(root='data',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

mnist_test_dataset = datasets.FashionMNIST(root='data',
                              train=False,
                              transform=transforms.ToTensor())

mnist_ood_dataset = datasets.MNIST(root='data',
                                    train=False,
                                    transform=transforms.ToTensor(),
                                    download=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
# from sklearn import datasets
from dataclasses import dataclass
from torch.optim.lr_scheduler import StepLR
import copy
from tqdm import tqdm

from torchvision import transforms
from PIL import Image

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True
    
    
from vbll.layers.classification import VBLLClassificationD


class MLP(nn.Module):
    def __init__(self, cfg):
        super(MLP, self).__init__()
        self.cfg = cfg
        self.fc1 = nn.Linear(cfg.IN_FEATURES, cfg.HIDDEN_FEATURES)
        self.fc2 = nn.Linear(cfg.HIDDEN_FEATURES, cfg.HIDDEN_FEATURES)
        self.relu = nn.ReLU()
        self.vbll = VBLLClassificationD(cfg.HIDDEN_FEATURES,
                                        cfg.OUT_FEATURES,
                                        cfg.REG_WEIGHT,
                                        parameterization = cfg.PARAM,
                                        return_ood=cfg.RETURN_OOD)

    def forward(self, x):
        out = x.view(x.shape[0], -1)
        out = self.relu(self.fc1(out))
        out = self.relu(self.fc2(out))
        return self.vbll(out)

In [5]:
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

class train_cfg:
  num_epochs = 1
  batch_size = 256
  lr = 1e-3
  lr_decay_every = 10000
  lr_decay_gamma = 0.5
  wd = 1e-2
  opt = torch.optim.AdamW
  clip_value = 1
  validation_freq = 1

def eval_acc(preds, y):
    map_preds = torch.argmax(preds, dim=1)
    return (map_preds == y).float().mean()

def train(train_dataset, test_dataset, model, train_cfg):
  optimizer = train_cfg.opt(model.parameters(),
                            lr=train_cfg.lr,
                            weight_decay=train_cfg.wd)

  # param_list = [
  #     {'params': model.encoder.parameters()},
  #     {'params': model._W, 'weight_decay': llwd},
  #     {'params': model.S.parameters(), 'weight_decay': llwd},
  #     {'params': model.Sig_inv.parameters(), 'weight_decay': llwd}
  # ]

  # optimizer = train_cfg.opt(param_list,
  #                           lr=train_cfg.lr,
  #                           weight_decay=train_cfg.wd)


  scheduler = StepLR(optimizer, step_size=train_cfg.lr_decay_every, gamma=train_cfg.lr_decay_gamma)

  dataloader = DataLoader(train_dataset, batch_size = train_cfg.batch_size, shuffle=True)
  test_dataloader = DataLoader(test_dataset, batch_size = train_cfg.batch_size, shuffle=True)

  output_metrics = {
      'train_loss': [],
      'test_loss': [],
      'train_acc': [],
      'test_acc': []
      # 'train_mse': [],
      # 'test_mse': []
  }

  for epoch in range(train_cfg.num_epochs + 1):
    running_loss = []
    running_acc = []
    # running_mse = []

    model.train()
    for train_step, data in enumerate(dataloader):
      optimizer.zero_grad()
      x = data[0].to(device)
      y_label = data[1].to(device)
      out = model(x)

      loss = out.train_loss_fn(y_label)
      running_loss.append(loss.item())
      running_acc.append(eval_acc(out.predictive.probs, y_label).item())
      # print(out.ood_scores)
      # print(out.predictive.probs)
      # mse = ((out.predictive.mean - y_label).mean(0) ** 2).sum(-1)
      # running_mse.append(mse.item())


      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), train_cfg.clip_value)
      optimizer.step()


    output_metrics['train_loss'].append(np.mean(running_loss))
    output_metrics['train_acc'].append(np.mean(running_acc))
    # output_metrics['train_mse'].append(np.sqrt(np.mean(running_mse)))

    scheduler.step()
    if epoch % train_cfg.validation_freq == 0:
      # print(f'[{epoch + 1}] loss: {np.mean(running_loss):.3f}')
      # running_loss = []
      running_test_loss = []
      running_test_acc = []
      # running_test_mse = []

      with torch.no_grad():
        model.eval()
        for test_step, data in enumerate(test_dataloader):
          x = data[0].to(device)
          y_label = data[1].to(device)

          out = model(x)
          loss = out.val_loss_fn(y_label)
          running_test_loss.append(loss.item())
          running_test_acc.append(eval_acc(out.predictive.probs, y_label).item())

          # mse = ((out.predictive.mean - y_label) ** 2).sum(-1).mean()
          # running_test_mse.append(mse.item())

        output_metrics['test_loss'].append(np.mean(running_test_loss))
        output_metrics['test_acc'].append(np.mean(running_test_acc))
        # output_metrics['test_mse'].append(np.sqrt(np.mean(running_test_mse)))

  return output_metrics

In [6]:
class cfg:
    IN_FEATURES = 784
    HIDDEN_FEATURES = 256
    OUT_FEATURES = 10
    REG_WEIGHT = 1./mnist_train_dataset.__len__()
    PARAM = 'dense'
    RETURN_OOD = True

model = MLP(cfg()).to(device)

output = train(mnist_train_dataset, mnist_test_dataset, model, train_cfg())

In [7]:
output

{'train_loss': [0.8517337664644768, 0.6433156530907813],
 'test_loss': [0.4461179818958044, 0.4345053147524595],
 'train_acc': [0.8112422430768926, 0.8640569593044037],
 'test_acc': [0.839453125, 0.84267578125]}