In [1]:
# 2D Toy training example
# Goal: examine how the gradients change the center and sigma of a single neuron
# Conclusions: Center Gradients are correct (moves to the right center)

In [2]:
from __future__ import print_function

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from scipy import stats

import sys
sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research/')
import Finite_Gaussian_Network_lib as fgnl
import Finite_Gaussian_Network_lib.fgn_helper_lib as fgnh

In [5]:
# random seeds
# torch.manual_seed(1665)
# np.random.seed(3266)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [6]:
!gpustat

[1mcrescent[0m  Wed Dec 18 13:26:04 2019
[0;36m[0][0m [0;34mGeForce GTX 1080[0m |[1;31m 80'C[0m, [1;32m 99 %[0m | [0;36m[1;33m 7448[0m / [0;33m 8119[0m MB | [1;30msalami[0m([0;33m2917M[0m) [1;30msoumi[0m([0;33m4521M[0m)
[0;36m[1][0m [0;34mGeForce GTX 1080[0m |[0;31m 46'C[0m, [0;32m  0 %[0m | [0;36m[1;33m  581[0m / [0;33m 8119[0m MB | [1;30mfelix[0m([0;33m571M[0m)


In [7]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = False
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# manualy set cuda device
torch.cuda.set_device(1)

CUDA Available:  True


In [8]:
# Define 2d Toy Data

# number of dimensions of the data
num_dim = 2

# gaussian target parameters
data_centers = 1*np.ones(num_dim)
sigma = 1

# hyper-plane separating the classes (this will become the target for the weights)
sep_plane = np.random.uniform(low=-1.0, high=1.0, size=num_dim)
# sep_plane = np.concatenate(([1],np.zeros(num_dim-1)))

num_samples = 2048
stretch = [1.0, 4.0]

samples_xs = np.array([stretch*np.random.normal(loc=0, scale=sigma, size=num_dim)+data_centers for _ in range(num_samples)] )

# apply labels based on side of sep hyper plane
samples_labels = np.array([ [1] if x>np.matmul(data_centers, sep_plane) else [-1] for x in np.matmul(samples_xs, sep_plane)])

# shuffle in unison
permutation = np.random.permutation(len(samples_xs))
for old_index, new_index in enumerate(permutation):
    samples_xs[new_index] = samples_xs[old_index]
    samples_labels[new_index] = samples_labels[old_index]

In [9]:
# 2D Check 
samples_x_1 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==1]
samples_y_1 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==1]

samples_x_2 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==-1]
samples_y_2 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==-1]

plt.scatter(samples_x_1, samples_y_1)
plt.scatter(samples_x_2, samples_y_2)
plt.axis([-8,10, -8,10])
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [10]:
# convert data to pytorch format 
tensor_x = torch.Tensor(samples_xs)
tensor_y = torch.Tensor(samples_labels)

my_dataset = torch.utils.data.TensorDataset(tensor_x[:num_samples*4/5],tensor_y[:num_samples*4/5]) # create your dataset
my_test_data = torch.utils.data.TensorDataset(tensor_x[num_samples*4/5:],tensor_y[num_samples*4/5:]) # create your dataset

my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=192, shuffle=True) # create your dataloader
my_test_dataloader = torch.utils.data.DataLoader(my_test_data) # create your dataloader

In [11]:
# Define a Finite Gaussian Neural Network

covar_type = 'sphere'
ordinal= float(5)
free_biases = True

class FGNet(nn.Module):
    def __init__(self):
        super(FGNet, self).__init__()
        self.fl = fgnl.FGN_layer(2,1, covar_type=covar_type, ordinal=ordinal, free_biases=free_biases)

    def forward(self, x):
        x = self.fl(x)
        # clip res to +-1 for MSE loss
        x  = torch.clamp(x, min=-1.0, max=1.0)

        return x

In [12]:
# Initialize the network
model = FGNet().to(device)

In [13]:
model.state_dict()

OrderedDict([('fl.weights', tensor([[ 0.1119, -0.6150]])),
             ('fl.biases', tensor([-0.0173])),
             ('fl.centers', tensor([[-0.0663, -0.0407]])),
             ('fl.inv_covars', tensor([0.4618]))])

In [14]:
# increase range of neuron
if covar_type in ['sphere', 'diag']:
    model.fl.inv_covars = torch.nn.Parameter(model.fl.inv_covars/10.0)
elif covar_type == 'full':
    # this should be fixed, but im not 100% on how the full matrix affects range
    model.fl.inv_covars = torch.nn.Parameter(model.fl.inv_covars/10.0)


In [15]:
# model stats
print(model)
print([p.size() for p in list(model.parameters())])
# print([p for p in list(model.parameters())])
print("Training?", model.training) 
model.state_dict()

FGNet(
  (fl): FGN_layer()
)
[torch.Size([1, 2]), torch.Size([1]), torch.Size([1, 2]), torch.Size([1])]
Training? True


OrderedDict([('fl.weights', tensor([[ 0.1119, -0.6150]])),
             ('fl.biases', tensor([-0.0173])),
             ('fl.centers', tensor([[-0.0663, -0.0407]])),
             ('fl.inv_covars', tensor([0.0462]))])

In [16]:
# importances of the constraints
lmbda_l2 = (4.0*0.1/len(my_dataloader.dataset))
if covar_type == 'sphere':
    lmbda_sigs = 1e2*lmbda_l2
elif covar_type == 'diag':
    lmbda_sigs = 1e5*lmbda_l2
elif covar_type == 'full':
    lmbda_sigs = 80.0*lmbda_l2
    
print(lmbda_l2)
print(lmbda_sigs)

# loss function
def loss_func(model, output, target):
    
    # normal MSE loss
    mse_loss = F.mse_loss(output, target)
    
    # sum of sigma squares loss
    sig_loss = fgnl.sigmas_loss(model)

    # l2 loss
    l2_loss = fgnh.l2_loss(model)
    
    # (old) negative log likelihood over gaussian loss
#     nllg_loss = 100.0*lmbda*torch.sum(likelihood)
#     nllg_loss = 0.0*torch.mean(likelihood)
    
    return mse_loss + lmbda_l2*l2_loss + lmbda_sigs*sig_loss 

0.0002442002442
0.02442002442


In [17]:
# number of correct pred function
def pred_func(output, target):
    # (old)split output into pred and likelihoods
#     output,_ = output
    pred = torch.Tensor.sign(output)
    correct = pred.eq(target.view_as(pred)).sum().item()   
    return correct

In [18]:
# model_optimizer
# optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1, momentum=0.5, nesterov=True)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.05,)
# optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
# optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1, )
# optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.1)


In [19]:
epochs = 10
train_res = fgnh.train(model, my_dataloader, loss_func, optimizer, epochs, save_hist=2, verbose=True, pred_func=pred_func, test_loader=my_test_dataloader)

Epoch 0 Train set - Average loss: 3.3590, Accuracy: 293/1638 (18%)
Test set - Average loss: 1.2291, Accuracy: 115/410 (28%)
Epoch 1 Train set - Average loss: 1.1401, Accuracy: 582/1638 (36%)
Test set - Average loss: 1.0547, Accuracy: 160/410 (39%)
Epoch 2 Train set - Average loss: 1.0294, Accuracy: 636/1638 (39%)
Test set - Average loss: 0.9919, Accuracy: 160/410 (39%)
Epoch 3 Train set - Average loss: 0.9802, Accuracy: 646/1638 (39%)
Test set - Average loss: 0.9660, Accuracy: 164/410 (40%)
Epoch 4 Train set - Average loss: 0.9552, Accuracy: 665/1638 (41%)
Test set - Average loss: 0.9411, Accuracy: 172/410 (42%)
Epoch 5 Train set - Average loss: 0.9383, Accuracy: 687/1638 (42%)
Test set - Average loss: 0.9232, Accuracy: 175/410 (43%)
Epoch 6 Train set - Average loss: 0.9310, Accuracy: 694/1638 (42%)
Test set - Average loss: 0.9120, Accuracy: 175/410 (43%)
Epoch 7 Train set - Average loss: 0.9295, Accuracy: 693/1638 (42%)
Test set - Average loss: 0.9103, Accuracy: 175/410 (43%)
Epoch 8 

In [20]:
# print model state_dict post training
model.state_dict()

OrderedDict([('fl.weights', tensor([[-0.6627,  0.2876]])),
             ('fl.biases', tensor([-1.6662])),
             ('fl.centers', tensor([[ 0.4986, -1.2752]])),
             ('fl.inv_covars', tensor([0.4365]))])

In [21]:
# compare center with target (1,1)

# plot the zero line
b = -np.matmul(sep_plane,data_centers)
X = np.arange(-0.5,1.6, 0.1)
zero_line = -(sep_plane[0]*X+b)/sep_plane[1]
plt.plot(X,zero_line, color='black', label='theoretical separator')

# final zero line
W = model.fl.weights.detach().numpy()[0]
center = model.fl.centers.detach().numpy()[0]
b = -np.matmul(W,center)
final_zero_line = -(W[0]*X+b)/W[1]
plt.plot(X,final_zero_line, color='yellow', label='final separator')


x, y = zip(*train_res['histories']['fl.centers'][:,0,:])
plt.plot(x,y, marker='.', linestyle='-', label='path of neuron center')

# plot start
x,y = train_res['histories']['fl.centers'][0,0,:]
plt.plot(x,y, marker='o', linestyle=' ', markersize=4, color="red", label='start of neuron center')
# plot target
x, y = 1,1
plt.plot(x,y, marker='o', linestyle=' ', markersize=4, color="green", label='theoretical center')

plt.legend()
plt.axis([-0.5,1.5, -0.5,1.5])
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [22]:
# check that inv covar has gone up, sigmas down, and trace down if covar_type=='full'
fgnl.plot_sigmas_histories(train_res['histories'])

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [23]:
# acc and loss hist
plt.plot(train_res['train_loss_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_loss_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Loss')
plt.show()

plt.plot(train_res['train_acc_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_acc_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Accuracy')
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
# visualize neuron activity over data space
# extracts the parameters and shows theoritial neuron activity

from matplotlib import cm
# scale of the heat maps
scale = 8
X1 = np.arange(-scale,scale+0.1, 0.1)
X2 = np.arange(-scale,scale+0.1, 0.1)
X1s, X2s = np.meshgrid(X1,X2)
heatmap_inputs = np.reshape(zip(X1s.flatten(),X2s.flatten()),(-1,2))

# combining circular gaussian with normal neuron
# neuron parameters (weights, bias)
W = model.fl.weights.detach().numpy()[0]
print("weights:",W)
# b = 0 # bias defined by by the center of radial function

# radial parameters
center = model.fl.centers.detach().numpy()[0] # controls the center of gaussian (<=> bias of neuron)
# plot center as red dot
plt.plot(center[0], center[1], marker='o', markersize=4, color="red")

print("center:",center)

# if bias is defined by center
b1 = -np.matmul(W,center)
# if bias is a free param
b2 = model.fl.biases.detach().numpy()[0]
print("bias from centers:",b1)
print("bias from params (same if free_biases==False):",b2)
# new zero line
zero_line = -(W[0]*X1+b2)/W[1]

# heatmap neuronal activity
n_activity = np.sum(W*heatmap_inputs, axis=1)+b
# print(n_activity.shape)

distances = heatmap_inputs-center

# heatmap radial activity 
if covar_type == 'sphere':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    r_activity = np.exp((-1.0/abs(sig)**2) *  np.sum(np.square(heatmap_inputs-center), axis=1))
    
elif covar_type == 'diag':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    ded = np.einsum('ij,ij->i', distances*abs(inv_covar)**2, distances)
    r_activity = np.exp(-ded)

else:
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = np.linalg.inv(np.matmul(inv_covar,inv_covar))
    ded = np.einsum('zi,ij,kj,zk->z', distances, inv_covar, inv_covar, distances)
    r_activity = np.exp(-ded)

print("inv covar:", inv_covar)
print("sig:", sig)

    
# overall heatmap activity
activity = n_activity*r_activity

# plot the zero line
plt.plot(X1,zero_line, color='black')
# plot the heatmap 
maxi = np.max(abs(activity))
print(maxi)
levels = np.arange(-maxi,maxi+0.1,maxi/10.0)
ticks = np.arange(-maxi,maxi+0.1, maxi/5.0)


plt.contourf(X1s, X2s, np.reshape(activity, np.shape(X1s) ), levels=levels, cmap=cm.RdYlBu_r)

plt.colorbar(ticks=ticks)
#reset axes
plt.axis([-scale,scale, -scale, scale])
plt.grid(True)
plt.show()

weights: [-0.6627345   0.28761005]
center: [ 0.49858898 -1.2751617 ]
bias from centers: 0.69718146
bias from params (same if free_biases==False): -1.6661538
inv covar: 0.4365278
sig: 2.2908049064130105
0.7097061502260248


<IPython.core.display.Javascript object>

In [25]:
# applies the model to get the actual heatmap
# results might be slightly different based on adding a tanh() or not, clipping of output, ordinal!=2 etc...
# should be close to above
model.eval()
heatmap_inputs = torch.Tensor(heatmap_inputs)
heatmap_preds = model(heatmap_inputs.to(device))
heatmap_preds = heatmap_preds.cpu().detach().numpy()

In [26]:
levels = np.arange(-1.0, 1.0+0.001, 5**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(heatmap_preds[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>