In [1]:
# N-Dimensional Toy training example
# Goal: expand 2.1.3a to multiple dimensions
# Conclusion: centers properly, but sigma dynamics still not clear
# sigma init: probably close to sqrt(in_feats)

In [2]:
from __future__ import print_function

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from scipy import stats

# graph viz tool
# import sys
# sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research')
import torch_helper_lib as th



In [5]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
torch.manual_seed(666)
use_cuda = False
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [6]:
# Define ND Toy Data

# number of dimensions of the data
num_dim = 25

# gaussian target parameters
data_centers = 0*np.ones(num_dim)
sigma = 1

# hyper-plane separating the classes (this will become the target for the weights)
sep_plane = np.random.uniform(low=-1.0, high=1.0, size=num_dim)
# sep_plane = np.concatenate(([1],np.zeros(num_dim-1)))

num_samples = 50000

samples_xs = np.array([np.random.normal(loc=0, scale=sigma, size=num_dim)+data_centers for _ in range(num_samples)] )

# apply labels based on side of sep hyper plane
samples_labels = np.array([ [1] if (x)>np.matmul(data_centers, sep_plane) else [-1] for x in np.matmul(samples_xs, sep_plane)])

In [7]:
# 2D Check 
samples_x_1 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==1]
samples_y_1 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==1]

samples_x_2 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==-1]
samples_y_2 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==-1]


plt.scatter(samples_x_1, samples_y_1)
plt.scatter(samples_x_2, samples_y_2)
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [8]:
# 3D Check
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# class 1
xs = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==1]
ys = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==1]
zs = [z for z,l in zip(samples_xs[:,2], samples_labels) if l==1]

ax.scatter(xs, ys, zs)

#  class -1
xs = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==-1]
ys = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==-1]
zs = [z for z,l in zip(samples_xs[:,2], samples_labels) if l==-1]

ax.scatter(xs, ys, zs)

plt.show()

<IPython.core.display.Javascript object>

In [9]:
# convert data to pytorch format 
tensor_x = torch.Tensor(samples_xs)
tensor_y = torch.Tensor(samples_labels)
# tensor_x = torch.stack([torch.Tensor(i) for i in zip(samples_x, samples_y)]) # transform to torch tensors
# tensor_y = torch.stack([torch.Tensor(i) for i in samples_labels])

my_dataset = torch.utils.data.TensorDataset(tensor_x[:num_samples*4/5],tensor_y[:num_samples*4/5]) # create your dataset
my_test_data = torch.utils.data.TensorDataset(tensor_x[num_samples*4/5:],tensor_y[num_samples*4/5:]) # create your dataset

my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=256) # create your dataloader
my_test_dataloader = torch.utils.data.DataLoader(my_test_data) # create your dataloader

In [10]:
# define the FGN layer class

###
# HARDCODED to be off center but not too far
### 

import math 

class FGN_layer(nn.Module):
    r""" Applies a Finite Gaussian Neuron layer to the incoming data
    
    Args:
    
    Shape:
    
    Attributes:
    
    Examples:
        
        >>> l=FGN_layer(20,30)
    
    """
    def __init__(self, in_features, out_features):
        super(FGN_layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        # learnable parameters
        # regular NN weights (transposed at the start, see order of Tensor(dims))
        self.weights = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad= True)
        # centers of FGNs
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad = True)
        # size of FGNs
        self.sigs = nn.Parameter(torch.Tensor(out_features,), requires_grad = True)
        
        # parameter init call
        self.reset_parameters()
    
    # parameter init definition
    def reset_parameters(self):
        s = np.sqrt(self.in_features)
        # regular NN init
        self.weights.data.uniform_(-s, s)
        # centers init
        self.centers.data.uniform_(-1, -1)
        # size init
        self.sigs.data.uniform_(self.in_features, self.in_features)
        
    def forward(self, input):
        
        # linear part is the same as normal NNs
        l = nn.functional.linear(input, self.weights, bias=None)
        # optional, apply tanh here
        # l = torch.tanh(l)
#         print("size of L", l.size())
#         print(l)


        # gaussian component
        # unsqueeze the inputs to allow broadcasting
        # compute distance to centers
        g = (input.unsqueeze(1)-self.centers)**2
#         print("size of G", g.size())
#         print(g)
        g = g.sum(dim=2)
#         print("size of G", g.size())

        # for future, use any norm?
#         g = torch.norm(input.unsqueeze(1)-self.centers, p=1, dim=2)

        # apply sigma
        g = -g/(self.sigs**2)
#         print("size of G", g.size())
        # apply exponential
        g = torch.exp(g)
#         print("size of G", g.size())

        # combine gaussian with linear
        res = l*g
        # optional, flatten res
        # res = F.tanh(res)
#         print("size of L*G", res.size())

        # clip res to +1
        res  = torch.clamp(res, min=-1.0, max=1.0)
        
        
        return res
    

In [11]:
# Define a (very specific) Finite Gaussian Neural Network

class FGNet(nn.Module):
    def __init__(self):
        super(FGNet, self).__init__()
        self.l1 = FGN_layer(num_dim,1)
        
#         test with only linear classifier
#         self.l1= nn.Linear(num_dim,1)

    def forward(self, x):
        # input: tensor of batch (batchsize, (num_dims))
#         print(x.shape)
        x = self.l1(x)
#         print(x.shape) 
#         x = x.view(1)

        return x

In [12]:
# Initialize the network
model = FGNet().to(device)

In [13]:
# model stats
print(model)
print([p.size() for p in list(model.parameters())])
print("Training?", model.training) 

# check sizes
print("l1")
print(model.l1.weights.size()) 
print(model.l1.centers.size()) 
print(model.l1.sigs.size())

# check weights before training
print("l1")
print(model.l1.weights.view(-1).cpu().detach().numpy()[0:4])
print(model.l1.centers.view(-1).cpu().detach().numpy()[0:4])
print(model.l1.sigs.view(-1).cpu().detach().numpy()[0:4])

FGNet(
  (l1): FGN_layer()
)
[torch.Size([1, 25]), torch.Size([1, 25]), torch.Size([1])]
Training? True
l1
torch.Size([1, 25])
torch.Size([1, 25])
torch.Size([1])
l1
[-1.8809628 -2.298659  -3.8821907 -3.9880955]
[-1. -1. -1. -1.]
[25.]


In [14]:
def train(model, device, train_loader, optimizer, epoch):
    
    # values to return
    weights =  []
    centers = []
    sigmas = []
    weights.append(list(model.l1.weights.view(-1).cpu().detach().numpy()))
    centers.append(list(model.l1.centers.view(-1).cpu().detach().numpy()))
    sigmas.append(list(model.l1.sigs.view(-1).cpu().detach().numpy()))
    
    losses = th.AverageMeter()
    correct = 0
    
    # set model to trainable mode
    model.train()
    # load a batch
    for batch_idx, (data, target) in enumerate(train_loader):
#         print(data,target)
        # load batch data, targets to device
        data, target = data.to(device), target.to(device)
        # reset optimizer gradients
        optimizer.zero_grad()
        # compute predictions
        output = model(data)
#         print(output)
        # compute loss
        loss = F.mse_loss(output, target.float())
        # print(loss)
        
        # update rolling average loss
        losses.update(loss.item(), data.size(0) )
        # update predictions
        pred = torch.Tensor.sign(output)
        correct += pred.eq(target.view_as(pred)).sum().item()        
        
        # propagate gradients
        loss.backward()
#         print("max grad centers:", torch.max(model.l1.centers.grad) )
#         print("max grad sigs:", torch.max(model.l1.sigs.grad) )

        # apply stored gradients to parameters
        optimizer.step()
        
        # print batch stats
#         if batch_idx % (num_samples/5) == 0:
#             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * len(data), len(train_loader.dataset),
#                 100. * batch_idx / len(train_loader), loss.item()))
#             print("batch:", batch_idx, " Loss", losses.avg)
    
        weights.append(list(model.l1.weights.view(-1).cpu().detach().numpy()))
        centers.append(list(model.l1.centers.view(-1).cpu().detach().numpy()))
        sigmas.append(list(model.l1.sigs.view(-1).cpu().detach().numpy()))
        
        
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        losses.avg, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))

    return(weights, centers, sigmas)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.mse_loss(output, target.float())
            pred = torch.Tensor.sign(output)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [15]:
# training params 
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.5, weight_decay=0.01)

# values to observes
weights_per_update =  []
centers_per_update = []
sigmas_per_update = []

In [16]:
# train the model 


for epoch in range(1, 500):
    print("Epoch:", epoch)
    weights, centers, sigmas = train(model, device, my_dataloader, optimizer, epoch)
    weights_per_update+=weights
    centers_per_update+=centers
    sigmas_per_update+=sigmas
    
    test(model, device, my_test_dataloader)

Epoch: 1
Train set: Average loss: 1.6328, Accuracy: 23326/40000 (58%)
Test set: Average loss: 1.6412, Accuracy: 5775/10000 (58%)
Epoch: 2
Train set: Average loss: 1.6071, Accuracy: 23530/40000 (59%)
Test set: Average loss: 1.6132, Accuracy: 5854/10000 (59%)
Epoch: 3
Train set: Average loss: 1.5794, Accuracy: 23781/40000 (59%)
Test set: Average loss: 1.5824, Accuracy: 5930/10000 (59%)
Epoch: 4
Train set: Average loss: 1.5490, Accuracy: 24043/40000 (60%)
Test set: Average loss: 1.5470, Accuracy: 6016/10000 (60%)
Epoch: 5
Train set: Average loss: 1.5161, Accuracy: 24344/40000 (61%)
Test set: Average loss: 1.5113, Accuracy: 6106/10000 (61%)
Epoch: 6
Train set: Average loss: 1.4827, Accuracy: 24695/40000 (62%)
Test set: Average loss: 1.4742, Accuracy: 6182/10000 (62%)
Epoch: 7
Train set: Average loss: 1.4482, Accuracy: 25035/40000 (63%)
Test set: Average loss: 1.4366, Accuracy: 6278/10000 (63%)
Epoch: 8
Train set: Average loss: 1.4122, Accuracy: 25373/40000 (63%)
Test set: Average loss: 1.3

Train set: Average loss: 0.1563, Accuracy: 39850/40000 (100%)
Test set: Average loss: 0.1566, Accuracy: 9963/10000 (100%)
Epoch: 65
Train set: Average loss: 0.1568, Accuracy: 39850/40000 (100%)
Test set: Average loss: 0.1571, Accuracy: 9962/10000 (100%)
Epoch: 66
Train set: Average loss: 0.1572, Accuracy: 39849/40000 (100%)
Test set: Average loss: 0.1574, Accuracy: 9962/10000 (100%)
Epoch: 67
Train set: Average loss: 0.1576, Accuracy: 39849/40000 (100%)
Test set: Average loss: 0.1578, Accuracy: 9962/10000 (100%)
Epoch: 68
Train set: Average loss: 0.1579, Accuracy: 39849/40000 (100%)
Test set: Average loss: 0.1580, Accuracy: 9962/10000 (100%)
Epoch: 69
Train set: Average loss: 0.1581, Accuracy: 39849/40000 (100%)
Test set: Average loss: 0.1583, Accuracy: 9962/10000 (100%)
Epoch: 70
Train set: Average loss: 0.1583, Accuracy: 39849/40000 (100%)
Test set: Average loss: 0.1584, Accuracy: 9962/10000 (100%)
Epoch: 71
Train set: Average loss: 0.1585, Accuracy: 39849/40000 (100%)
Test set: Aver

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 127
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 128
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 129
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 130
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 131
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 132
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 133
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 189
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 190
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 191
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 192
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 193
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 194
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 195
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 251
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 252
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 253
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 254
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 255
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 256
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 257
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 313
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 314
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 315
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 316
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 317
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 318
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 319
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 375
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 376
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 377
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 378
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 379
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 380
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 381
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 437
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 438
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 439
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 440
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 441
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 442
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 443
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test se

Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)
Epoch: 499
Train set: Average loss: 0.1590, Accuracy: 39847/40000 (100%)
Test set: Average loss: 0.1590, Accuracy: 9961/10000 (100%)


In [17]:
# check weights after training
print("l1")
print(model.l1.weights.view(-1).cpu().detach().numpy()[0:4])
print(model.l1.centers.view(-1).cpu().detach().numpy()[0:4])
print(model.l1.sigs.view(-1).cpu().detach().numpy()[0:4])

l1
[ 0.64928657 -0.6214736  -0.14267817 -0.60771143]
[ 0.00258725 -0.00167843  0.00031047 -0.00245829]
[5.3168154]


In [18]:
# l1
# [-3.7619257 -4.597318  -7.7643814 -7.976191 ]
# [-1. -1. -1. -1.]
# [100.]

In [19]:
# x, y = zip(*centers_per_update)
# plt.plot(x,y, marker='.', linestyle=' ')
# # plt.axis([-3,3, -3, 3])
# plt.grid()
# plt.show()

# plot dist to center
dists_per_update = [np.linalg.norm(data_centers-x) for x in centers_per_update]
plt.plot(dists_per_update, marker='.', linestyle=' ')
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [20]:
plt.plot(sigmas_per_update, marker='.', linestyle=' ')
plt.grid()
plt.show()

<IPython.core.display.Javascript object>