In [1]:
# dev code to compute likelihood of the data over the gaussians of a layer

In [2]:
from __future__ import print_function
import numpy as np

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt
from matplotlib import cm

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from scipy import stats

# sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research')
import torch_helper_lib as th

In [5]:
# random seeds
torch.manual_seed(665)
np.random.seed(3326)

torch.backends.cudnn.deterministic = True
torch.cuda.manual_seed_all(999)

# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [6]:
## Define nd Toy Data

# number of dimensions of the data
num_dim = 2

# gaussian target parameters
data_centers = 1*np.ones(num_dim)
sigma = 1

# hyper-plane separating the classes (this will become the target for the weights)
sep_plane = np.random.uniform(low=-1.0, high=1.0, size=num_dim)
# sep_plane = np.concatenate(([1],np.zeros(num_dim-1)))

num_samples = 500

samples_xs = np.array([np.random.normal(loc=0, scale=sigma, size=num_dim)+data_centers for _ in range(num_samples)] )

# apply labels based on side of sep hyper plane
samples_labels = np.array([ [1] if x>np.matmul(data_centers, sep_plane) else [-1] for x in np.matmul(samples_xs, sep_plane)])

In [7]:
# convert data to pytorch format 
tensor_x = torch.Tensor(samples_xs)
tensor_y = torch.Tensor(samples_labels)

my_dataset = torch.utils.data.TensorDataset(tensor_x[:num_samples*4/5],tensor_y[:num_samples*4/5]) # create your dataset
my_test_data = torch.utils.data.TensorDataset(tensor_x[num_samples*4/5:],tensor_y[num_samples*4/5:]) # create your dataset

my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=5) # create your dataloader
my_test_dataloader = torch.utils.data.DataLoader(my_test_data) # create your dataloader

In [8]:
print(tensor_x.shape)

torch.Size([500, 2])


In [9]:
# define the FGN layer class to dev

class FGN_layer(nn.Module):
    r""" Applies a Finite Gaussian Neuron layer to the incoming data
    
    Args:
    
    Shape:
    
    Attributes:
    
    Examples:
        
        >>> l=FGN_layer(20,30)
    
    """
    def __init__(self, in_features, out_features):
        super(FGN_layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        # learnable parameters
        # regular NN weights (transposed at the start, see order of Tensor(dims))
        self.weights = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad = True)
        # centers of FGNs
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad = True)
        # size of FGNs
        self.sigs = nn.Parameter(torch.Tensor(out_features,), requires_grad = True)
        # importance of each gaussian for likelihoods
        self.pis = nn.Parameter(torch.Tensor(out_features,), requires_grad = True)
        # epsilon
        self.eps = 1e-7        
        
        # parameter init call
        self.reset_parameters()
    
    # parameter init definition
    def reset_parameters(self):
        s = np.sqrt(self.in_features)
        # regular NN init
        self.weights.data.uniform_(-s, s)
        # centers init, assuming data normalized to mean 0 var 1
        self.centers.data.uniform_(-0.01, 0.01)
        # size init, to be researched further
        self.sigs.data.uniform_(0.99*self.in_features, 1.01*self.in_features)
        # PIs init, start at 1/n each
        self.pis.data.fill_(1.0/self.out_features)
        
    def forward(self, input):
        
        # linear part is the same as normal NNs
        biases = -torch.sum(torch.mul(self.weights, self.centers), dim=-1)
        l = F.linear(input, self.weights, bias=biases)
        # optional, apply tanh here
        # l = torch.tanh(l)

        # gaussian component
        # unsqueeze the inputs to allow broadcasting
        # compute distance to centers
        g = (input.unsqueeze(1)-self.centers)**2
        g = g.sum(dim=2)

        # for future, use any norm?
#         g = torch.norm(input.unsqueeze(1)-self.centers, p=1, dim=2)

        # apply sigma
        g = -g/(self.sigs**2)
        # apply exponential
        g = torch.exp(g)

        # combine gaussian with linear
        res = l*g
        # optional, flatten res
        # res = F.tanh(res)

        # likelihoods computation for each data point
        if self.training:
            likelihoods = input.unsqueeze(1)
            likelihoods = likelihoods - self.centers
            likelihoods = likelihoods**2
            likelihoods = torch.sum(likelihoods, dim=-1)
            likelihoods = likelihoods/(self.sigs**2)
            # add ln(det(SIG)) = 2k*log(sig)
            likelihoods = likelihoods + 2*self.in_features*torch.log(self.sigs)
            # at this stage, all are ~ -ln N(sample|gaussian) for each gaussian in layer
            #multiply by the PIs, constrained to sum to 1
            pis_normalized = F.softmax(self.pis, dim=-1)
            likelihoods = likelihoods*pis_normalized
            # sum them up
            likelihoods = torch.sum(likelihoods, dim=-1)

        else:
            likelihoods = torch.tensor([-1.0])
        
        return res, likelihoods
    

In [10]:
# Define a Finite Gaussian Neural Network
class FGNet(nn.Module):
    def __init__(self):
        super(FGNet, self).__init__()
        self.l1 = FGN_layer(num_dim,3)

    def forward(self, x):
        x, l = self.l1(x)
        # clip res to +1
        x  = torch.clamp(x, min=-1.0, max=1.0)

        return x,l

In [11]:
# Initialize the network
fgn_model = FGNet().to(device)

In [12]:
# likelihood only loss function
def loss_func(model, output, target):
    # get output and likelihoods
    pred, likelihoods = output
    return torch.sum(likelihoods)

In [13]:
# optimizer
optimizer = optim.SGD(fgn_model.parameters(), lr=0.01, momentum=0.5)

In [14]:
# epoch number
epochs = 5

In [15]:
# model state dict before training
print(fgn_model.state_dict())

OrderedDict([('l1.weights', tensor([[-1.0900,  0.6888],
        [-0.7106,  0.4763],
        [-0.4106,  1.0383]], device='cuda:0')), ('l1.centers', tensor([[ 0.0052,  0.0076],
        [-0.0089,  0.0033],
        [-0.0095, -0.0062]], device='cuda:0')), ('l1.sigs', tensor([1.9921, 1.9843, 2.0009], device='cuda:0')), ('l1.pis', tensor([0.3333, 0.3333, 0.3333], device='cuda:0'))])


In [16]:
### train:
res1 = th.train(fgn_model, device, my_dataloader, loss_func, optimizer, epochs, save_hist=2, verbose=True, pred_func=None, test_loader=None)

Epoch 0 Train set - Average loss: 12.5958
Epoch 1 Train set - Average loss: 10.1052
Epoch 2 Train set - Average loss: 10.1053
Epoch 3 Train set - Average loss: 10.1053
Epoch 4 Train set - Average loss: 10.1053


In [17]:
# model state dict after training
print(fgn_model.state_dict())

OrderedDict([('l1.weights', tensor([[-1.0900,  0.6888],
        [-0.7106,  0.4763],
        [-0.4106,  1.0383]], device='cuda:0')), ('l1.centers', tensor([[1.0217, 1.1669],
        [1.0220, 1.1661],
        [1.0227, 1.1639]], device='cuda:0')), ('l1.sigs', tensor([0.9092, 0.9096, 0.9109], device='cuda:0')), ('l1.pis', tensor([0.3458, 0.3383, 0.3159], device='cuda:0'))])


In [18]:
### test 
res2 = th.test(fgn_model, device, my_test_dataloader, loss_func, verbose=True, pred_func=None)

Test set - Average loss: -1.0000


In [19]:
### BELOW == DEV work

In [20]:
# compute likelihood for a batch
batch_x, batch_y = next(iter(my_dataloader))

In [21]:
print(batch_x.shape)

torch.Size([5, 2])


In [22]:
# for one sample
# for one gaussian
sample = batch_x[0].to(device)
gaussian_centers = fgn_model.l1.centers[0]
sig = fgn_model.l1.sigs[0]
print(sample)
print(gaussian_centers)
print(sig)

tensor([ 1.0389, -1.9596], device='cuda:0')
tensor([1.0217, 1.1669], device='cuda:0', grad_fn=<SelectBackward>)
tensor(0.9092, device='cuda:0', grad_fn=<SelectBackward>)


In [23]:
# log likelihood based on https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Likelihood_function

ite =  iter(xrange(99))

# sample minus centers
x = sample-gaussian_centers
print(next(ite), x)
# X.T * SIG-1 * X
x = x**2
print(next(ite), x)
x = torch.sum(x, dim=-1)
print(next(ite), x)
x = x/(sig**2)
print(next(ite), x)
# add ln(det(SIG)) = 2k*log(sig)
x = x + 2*batch_x.shape[-1]*torch.log(sig)
print(next(ite), x)
# x is now the negative log likelihood of this sample for this gaussian

# without the constants, which we ignore (correct?)

0 tensor([ 0.0172, -3.1264], device='cuda:0', grad_fn=<SubBackward0>)
1 tensor([2.9491e-04, 9.7747e+00], device='cuda:0', grad_fn=<PowBackward0>)
2 tensor(9.7750, device='cuda:0', grad_fn=<SumBackward2>)
3 tensor(11.8247, device='cuda:0', grad_fn=<DivBackward0>)
4 tensor(11.4439, device='cuda:0', grad_fn=<AddBackward0>)


In [24]:
ite =  iter(xrange(99))

# now more general case, mix of gaussian, still one sample
x = sample - fgn_model.l1.centers
print(next(ite), x)
x = x**2
print(next(ite), x)
x = torch.sum(x, dim=-1)
print(next(ite), x)
x = x/(fgn_model.l1.sigs**2)
print(next(ite), x)
x = x + 2*batch_x.shape[-1]*torch.log(fgn_model.l1.sigs)
print(next(ite), x)
# at this stage, all are ~ -ln N(sample|gaussian) for each gaussian in layer

#multiply by the PIs
x = x*fgn_model.l1.pis
print(next(ite), x)
# sum them up
x = torch.sum(x)
print(next(ite), x)


0 tensor([[ 0.0172, -3.1264],
        [ 0.0169, -3.1257],
        [ 0.0162, -3.1235]], device='cuda:0', grad_fn=<SubBackward0>)
1 tensor([[2.9491e-04, 9.7747e+00],
        [2.8670e-04, 9.7700e+00],
        [2.6182e-04, 9.7561e+00]], device='cuda:0', grad_fn=<PowBackward0>)
2 tensor([9.7750, 9.7703, 9.7564], device='cuda:0', grad_fn=<SumBackward2>)
3 tensor([11.8247, 11.8082, 11.7590], device='cuda:0', grad_fn=<DivBackward0>)
4 tensor([11.4439, 11.4293, 11.3856], device='cuda:0', grad_fn=<AddBackward0>)
5 tensor([3.9571, 3.8667, 3.5968], device='cuda:0', grad_fn=<MulBackward0>)
6 tensor(11.4206, device='cuda:0', grad_fn=<SumBackward0>)


In [25]:
ite =  iter(xrange(99))

# now for a whole batch, for all gaussians in a layer
x = batch_x.unsqueeze(1).to(device)
print(next(ite), x)
x = x - fgn_model.l1.centers
print(next(ite), x)
x = x**2
print(next(ite), x)
x = torch.sum(x, dim=-1)
print(next(ite), x)
x = x/(fgn_model.l1.sigs**2)
print(next(ite), x)
# add ln(det(SIG)) = 2k*log(sig)
x = x + 2*batch_x.shape[-1]*torch.log(fgn_model.l1.sigs)
print(next(ite), x)
# at this stage, all are ~ -ln N(sample|gaussian) for each gaussian in layer
#multiply by the PIs
x = x*fgn_model.l1.pis
print(next(ite), x)
# sum them up
x = torch.sum(x, dim=-1)
print(next(ite), x)


0 tensor([[[ 1.0389, -1.9596]],

        [[ 2.6153,  0.7124]],

        [[ 0.9541, -0.2875]],

        [[ 1.2648,  1.1148]],

        [[-0.4840,  2.7405]]], device='cuda:0')
1 tensor([[[ 0.0172, -3.1264],
         [ 0.0169, -3.1257],
         [ 0.0162, -3.1235]],

        [[ 1.5936, -0.4545],
         [ 1.5933, -0.4537],
         [ 1.5926, -0.4515]],

        [[-0.0677, -1.4544],
         [-0.0679, -1.4536],
         [-0.0686, -1.4514]],

        [[ 0.2431, -0.0521],
         [ 0.2429, -0.0514],
         [ 0.2421, -0.0491]],

        [[-1.5057,  1.5736],
         [-1.5060,  1.5744],
         [-1.5067,  1.5766]]], device='cuda:0', grad_fn=<SubBackward0>)
2 tensor([[[2.9491e-04, 9.7747e+00],
         [2.8670e-04, 9.7700e+00],
         [2.6182e-04, 9.7561e+00]],

        [[2.5394e+00, 2.0656e-01],
         [2.5386e+00, 2.0589e-01],
         [2.5362e+00, 2.0388e-01]],

        [[4.5768e-03, 2.1151e+00],
         [4.6094e-03, 2.1130e+00],
         [4.7120e-03, 2.1065e+00]],

        [[5.910

In [26]:
# constrain PIs
print(fgn_model.l1.pis / fgn_model.l1.pis.sum(1, keepdim=True).clamp(min=1e-7))

RuntimeError: Dimension out of range (expected to be in range of [-1, 0], but got 1)