In [2]:
import numpy as np
import torch
import os
from matplotlib import pyplot as plt
from sklearn.metrics import average_precision_score, roc_auc_score
from torch import nn
from torch.nn import functional as F
from tqdm import trange, tqdm



In [3]:
def ece(probs, labels, n_bins=30):
    '''
    probs has shape [n_examples, n_classes], labels has shape [n_class] -> np.float
    Computes the Expected Calibration Error (ECE). Many options are possible,
    in this implementation, we provide a simple version.

    Using a uniform binning scheme on the full range of probabilities, zero
    to one, we bin the probabilities of the predicted label only (ignoring
    all other probabilities). For the ith bin, we compute the avg predicted
    probability, p_i, and the bin's total accuracy, a_i. We then compute the
    ith calibration error of the bin, |p_i - a_i|. The final returned value
    is the weighted average of calibration errors of each bin.
    '''
    n_examples, n_classes = probs.shape

    # assume that the prediction is the class with the highest prob.
    preds = np.argmax(probs, axis=1)

    onehot_labels = np.eye(n_classes)[labels]

    predicted_class_probs = probs[range(n_examples), preds]

    # Use uniform bins on the range of probabilities, i.e. closed interval [0.,1.]
    bin_upper_edges = np.histogram_bin_edges([], bins=n_bins, range=(0., 1.))
    bin_upper_edges = bin_upper_edges[1:] # bin_upper_edges[0] = 0.

    probs_as_bin_num = np.digitize(predicted_class_probs, bin_upper_edges)
    sums_per_bin = np.bincount(probs_as_bin_num, minlength=n_bins, weights=predicted_class_probs)
    sums_per_bin = sums_per_bin.astype(np.float32)

    total_per_bin = np.bincount(probs_as_bin_num, minlength=n_bins) \
        + np.finfo(sums_per_bin.dtype).eps # division by zero
    avg_prob_per_bin = sums_per_bin / total_per_bin

    accuracies = onehot_labels[range(n_examples), preds] # accuracies[i] is 0 or 1
    accuracies_per_bin = np.bincount(probs_as_bin_num, weights=accuracies, minlength=n_bins) \
        / total_per_bin

    prob_of_being_in_a_bin = total_per_bin / float(n_examples)

    ece_ret = np.abs(accuracies_per_bin - avg_prob_per_bin) * prob_of_being_in_a_bin
    ece_ret = np.sum(ece_ret)
    return ece_ret

def load_rotated_mnist():
    '''
    The difference between MNIST and Rotated MNIST is that Rotated MNIST has
    rotated *test* images.
    '''

    mnist_path = "data/rotated_mnist.npz"
    #if not os.path.isfile(mnist_path):
    #    mnist_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data/rotated_mnist.npz")

    data = np.load(mnist_path)

    x_train = torch.from_numpy(data["x_train"]).reshape([-1, 784])
    y_train = torch.from_numpy(data["y_train"])

    dataset_train = torch.utils.data.TensorDataset(x_train, y_train)

    return dataset_train


In [4]:
class Densenet(torch.nn.Module):
    '''
    Simple module implementing a feedforward neural network with
    num_layers layers of size width and input of size input_size.
    '''
    def __init__(self, input_size, num_layers, width):
        super().__init__()
        input_layer = torch.nn.Sequential(nn.Linear(input_size, width),
                                           nn.ReLU())
        hidden_layers = [nn.Sequential(nn.Linear(width, width),
                                    nn.ReLU()) for _ in range(num_layers)]
        output_layer = torch.nn.Linear(width, 10)
        layers = [input_layer, *hidden_layers, output_layer]
        self.net = torch.nn.Sequential(*layers)


    def forward(self, x):
        out = self.net(x)
        return out


    def predict_class_probs(self, x):
        probs = F.softmax(self.forward(x), dim=1)
        #print(probs.size())
        #print(torch.sum(probs,dim=1))
        #print(torch.sum(F.softmax(self.forward(x), dim=0),dim=0))
        return probs



In [5]:
class BayesianLayer(torch.nn.Module):
    '''
    Module implementing a single Bayesian feedforward layer.
    The module performs Bayes-by-backprop, that is, mean-field
    variational inference. It keeps prior and posterior weights
    (and biases) and uses the reparameterization trick for sampling.
    '''
    def __init__(self, input_dim, output_dim, bias=True):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.use_bias = bias

        # TODO: enter your code here
        self.prior_mu = nn.Parameter(torch.zeros(output_dim,input_dim))
        self.prior_sigma = nn.Parameter(torch.ones(output_dim,input_dim))
        self.weight_mu = nn.Parameter(torch.zeros(output_dim,input_dim))
        self.weight_logsigma = nn.Parameter(torch.zeros(output_dim,input_dim))

        if self.use_bias:
            self.bias_mu = nn.Parameter(torch.zeros(output_dim))
            self.bias_logsigma = nn.Parameter(torch.zeros(output_dim))
        else:
            self.register_parameter('bias_mu', None)
            self.register_parameter('bias_logsigma', None)


    def forward(self, inputs):
        # TODO: enter your code here
        
        #weight = torch.normal(self.weight_mu,
        #                      torch.exp(self.weight_logsigma))
        eps = torch.empty(self.weight_mu.size()).normal_()
        weight = self.weight_mu + torch.exp(self.weight_logsigma)*eps
        
        if self.use_bias:
            # TODO: enter your code here
            #bias = torch.normal(self.bias_mu,
            #                    torch.exp(self.bias_logsigma))
            eps = torch.empty(self.bias_mu.size()).normal_()
            bias = self.bias_mu + torch.exp(self.bias_logsigma)*eps
            
        else:
            bias = None
    
        # TODO: enter your code here
        # return ?        
        return F.linear(inputs,weight,bias)




    def kl_divergence(self):
        '''
        Computes the KL divergence between the priors and posteriors for this layer.
        '''
        kl_loss = self._kl_divergence(self.weight_mu, self.weight_logsigma)
        if self.use_bias:
            kl_loss += self._kl_divergence(self.bias_mu, self.bias_logsigma)
        return kl_loss


    def _kl_divergence(self, mu, logsigma):
        '''
        Computes the KL divergence between one Gaussian posterior
        and the Gaussian prior.
        '''

        # TODO: enter your code here
        #P_sample = torch.normal(self.prior_mu,
        #                        self.prior_sigma)
        eps = torch.empty(self.prior_mu.size()).normal_()
        P_sample = self.prior_mu + self.prior_sigma*eps
        
        #Q_sample = torch.normal(self.weight_mu,
        #                        torch.exp(self.weight_logsigma))
        eps = torch.empty(self.weight_mu.size()).normal_()
        Q_sample = self.weight_mu + torch.exp(self.weight_logsigma)*eps
        
        kl = F.kl_div(Q_sample,P_sample)
        return kl

    
class BayesNet(torch.nn.Module):
    '''
    Module implementing a Bayesian feedforward neural network using
    BayesianLayer objects.
    '''
    def __init__(self, input_size, num_layers, width):
        super().__init__()
        self.num_layers = num_layers
        input_layer = torch.nn.Sequential(BayesianLayer(input_size, width),
                                           nn.ReLU())
        hidden_layers = [nn.Sequential(BayesianLayer(width, width),
                                    nn.ReLU()) for _ in range(num_layers)]
        output_layer = BayesianLayer(width, 10)
        layers = [input_layer, *hidden_layers, output_layer]
        self.net = torch.nn.Sequential(*layers)


    def forward(self, x):
        return self.net(x)


    def predict_class_probs(self, x, num_forward_passes=10):
        assert x.shape[1] == 28**2
        batch_size = x.shape[0]

        # TODO: make n random forward passes
        # compute the categorical softmax probabilities
        # marginalize the probabilities over the n forward passes
        probs = F.softmax(self.forward(x), dim=1)
        for i in range(1,num_forward_passes):
            probs += F.softmax(self.forward(x), dim=1)
        probs /= num_forward_passes

        assert probs.shape == (batch_size, 10)
        return probs


    def kl_loss(self):
        '''
        Computes the KL divergence loss for all layers.
        '''
        # TODO: enter your code here
        kl_loss = model.net[0][0].kl_divergence()
        for i in range(1,1+self.num_layers):
            print(model)
            print(model.net)
            print(model.net[0])
            #print(model.net[0].kl_divergence())
            print(model.net[0][0])
            print((model.net[0][0]).input_dim)
            print((model.net[0][0]).kl_divergence())
            kl_loss = kl_loss + (model.net[i][0]).kl_divergence()
        kl_loss = kl_loss + (model.net[-1]).kl_divergence()
        
        return kl_loss
        

    

In [6]:
def train_network(model, optimizer, train_loader, num_epochs=100, pbar_update_interval=100):
    '''
    Updates the model parameters (in place) using the given optimizer object.
    Returns `None`.

    The progress bar computes the accuracy every `pbar_update_interval`
    iterations.
    '''
    criterion = torch.nn.CrossEntropyLoss() # always used in this assignment

    pbar = trange(num_epochs)
    for i in pbar:
        for k, (batch_x, batch_y) in enumerate(train_loader):
            model.zero_grad()
            y_pred = model(batch_x)
            loss = criterion(y_pred, batch_y)
            if type(model) == BayesNet:
                # BayesNet implies additional KL-loss.
                # TODO: enter your code here
                loss += model.kl_loss()
            loss.backward()
            optimizer.step()

            if k % pbar_update_interval == 0:
                acc = (model(batch_x).argmax(axis=1) == batch_y).sum().float()/(len(batch_y))
                pbar.set_postfix(loss=loss.item(), acc=acc.item())

In [7]:
def evaluate_model(model, model_type, test_loader, batch_size, extended_eval, private_test):
    '''
    Evaluates the trained model based on accuracy and ECE.
    If extended_eval is True, also computes predictive confidences
    on the FashionMNIST data set (out-of-distribution/OOD) and saves the
    most and least confidently classified images for both data sets
    as well as the classification performance for OOD detection based
    on the predictive confidences.
    '''
    accs_test = []
    probs = torch.tensor([])
    labels = torch.tensor([]).long()
    for batch_x, batch_y in test_loader:
        pred = model.predict_class_probs(batch_x)
        #print(np.shape(pred))
        probs = torch.cat((probs, pred))
        #print(np.shape(probs))
        labels = torch.cat((labels, batch_y))
        acc = (pred.argmax(axis=1) == batch_y).sum().float().item()/(len(batch_y))
        accs_test.append(acc)
    print(np.shape(pred))

    if not private_test:
        acc_mean = np.mean(accs_test)
        ece_mean = ece(probs.detach().numpy(), labels.numpy())
        print(f"Model type: {model_type}\nAccuracy = {acc_mean:.3f}\nECE = {ece_mean:.3f}")
    else:
        print("Using private test set.")

    final_probs = probs.detach().numpy()
    
    return final_probs

In [8]:
test_loader=None
private_test=False

num_epochs = 1
batch_size = 128  # Try playing around with this
print_interval = 100
learning_rate = 5e-4  # Try playing around with this
model_type = "bayesnet"  # Try changing this to "densenet" as a comparison
extended_evaluation = False  # Set this to True for additional model evaluation

dataset_train = load_rotated_mnist()
train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
                                           shuffle=True, drop_last=True)

if model_type == "bayesnet":
    model = BayesNet(input_size=784, num_layers=2, width=100)
elif model_type == "densenet":
    model = Densenet(input_size=784, num_layers=2, width=100)

print(model)
    
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_network(model, optimizer, train_loader,
             num_epochs=num_epochs, pbar_update_interval=print_interval)

if test_loader is None:
    print("evaluating on train data")
    test_loader = train_loader
else:
    print("evaluating on test data")

# Do not change this! The main() method should return the predictions for the test loader
predictions = evaluate_model(model, model_type, test_loader, batch_size, extended_evaluation, private_test)

FileNotFoundError: [Errno 2] No such file or directory: 'data/rotated_mnist.npz'

In [9]:
model.net[0][0]

NameError: name 'model' is not defined

In [15]:


accs_test = []
probs = torch.tensor([])
labels = torch.tensor([]).long()
for batch_x, batch_y in test_loader:
    pred = model.predict_class_probs(batch_x)
    #print(np.shape(pred))
    probs = torch.cat((probs, pred))
    #print(np.shape(probs))
    labels = torch.cat((labels, batch_y))
    acc = (pred.argmax(axis=1) == batch_y).sum().float().item()/(len(batch_y))
    accs_test.append(acc)
    break
print(np.shape(pred))

torch.Size([128, 10])
