In [1]:
# 2D Toy training example
# Goal: examine how the gradients change the center and sigma of a single neuron
# Conclusions: Center Gradients are correct (moves to the right center)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from scipy import stats

import sys
sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research/')
import Finite_Gaussian_Network_lib as fgnl
import Finite_Gaussian_Network_lib.fgn_helper_lib as fgnh

In [5]:
# random seeds
# torch.manual_seed(1665)
# np.random.seed(3266)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [6]:
!gpustat

[1m[37mcrescent              [m  Wed Aug  5 13:06:07 2020  [1m[30m418.87.01[m
[36m[0][m [34mGeForce RTX 2080 Ti[m |[1m[31m 53'C[m, [32m  0 %[m | [36m[1m[33m  851[m / [33m10989[m MB | [1m[30mfelix[m([33m841M[m)
[36m[1][m [34mGeForce RTX 2080 Ti[m |[31m 48'C[m, [32m  0 %[m | [36m[1m[33m   10[m / [33m10989[m MB |


In [7]:
# Define what device we are using

# manualy set cuda device
torch.cuda.set_device(1)

print("CUDA Available: ",torch.cuda.is_available())
use_cuda = False
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [8]:
# Define 2d Toy Data

# number of dimensions of the data
num_dim = 2

# gaussian target parameters
data_centers = 1*np.ones(num_dim)
sigma = 1

# hyper-plane separating the classes (this will become the target for the weights)
sep_plane = np.random.uniform(low=-1.0, high=1.0, size=num_dim)
# sep_plane = np.concatenate(([1],np.zeros(num_dim-1)))

num_samples = 4000
stretch = [3, 1]
rotate = -np.pi/4

samples_xs = np.array([stretch*np.random.normal(loc=0, scale=sigma, size=num_dim)+data_centers for _ in range(num_samples)] )

# rotate
samples_xs = np.array([(np.cos(rotate)*x1-np.sin(rotate)*x2,
                        np.sin(rotate)*x1+np.cos(rotate)*x2)  for (x1,x2) in samples_xs])

data_centers = [np.cos(rotate)*data_centers[0]-np.sin(rotate)*data_centers[1],
                        np.sin(rotate)*data_centers[0]+np.cos(rotate)*data_centers[1]]

sep_plane = [np.cos(rotate)*sep_plane[0]-np.sin(rotate)*sep_plane[1],
                        np.sin(rotate)*sep_plane[0]+np.cos(rotate)*sep_plane[1]]

# apply labels based on side of sep hyper plane
samples_labels = np.array([ [1] if x>np.matmul(data_centers, sep_plane) else [-1] for x in np.matmul(samples_xs, sep_plane)])


# shuffle in unison
permutation = np.random.permutation(len(samples_xs))
for old_index, new_index in enumerate(permutation):
    samples_xs[new_index] = samples_xs[old_index]
    samples_labels[new_index] = samples_labels[old_index]

In [9]:
# 2D Check 
samples_x_1 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==1]
samples_y_1 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==1]

samples_x_2 = [x for x,l in zip(samples_xs[:,0], samples_labels) if l==-1]
samples_y_2 = [y for y,l in zip(samples_xs[:,1], samples_labels) if l==-1]

plt.scatter(samples_x_1, samples_y_1)
plt.scatter(samples_x_2, samples_y_2)
plt.axis([-8,8, -8,8])
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [10]:
# convert data to pytorch format 
tensor_x = torch.Tensor(samples_xs)
tensor_y = torch.Tensor(samples_labels)

slice_bound = int(num_samples*4/5)
my_dataset = torch.utils.data.TensorDataset(tensor_x[:slice_bound],tensor_y[:slice_bound]) # create your dataset
my_test_data = torch.utils.data.TensorDataset(tensor_x[slice_bound:],tensor_y[slice_bound:]) # create your dataset

my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=4000, shuffle=True) # create your dataloader
my_test_dataloader = torch.utils.data.DataLoader(my_test_data) # create your dataloader

In [11]:
# Define a Finite Gaussian Neural Network

covar_type = 'chol'
ordinal= float(2)
free_biases = True

class FGNet(nn.Module):
    def __init__(self):
        super(FGNet, self).__init__()
        self.fl = fgnl.FGN_layer(2,1, covar_type=covar_type, ordinal=ordinal, free_biases=free_biases)
        
        self.covar_type = covar_type

    def forward(self, x):
        x,_ = self.fl(x)
        # clip res to +-1 for MSE loss
        x  = torch.clamp(x, min=-1.0, max=1.0)

        return x

In [12]:
# Initialize the network
model = FGNet().to(device)

In [13]:
model.state_dict()

OrderedDict([('fl.weights', tensor([[0.5010, 0.4483]])),
             ('fl.biases', tensor([-0.4946])),
             ('fl.centers', tensor([[-0.0891, -0.1197]])),
             ('fl.inv_covars',
              tensor([[[7.0711e-01, 0.0000e+00],
                       [4.9488e-06, 7.0711e-01]]]))])

In [14]:
# increase range of neuron
if covar_type in ['sphere', 'diag']:
    model.fl.inv_covars = torch.nn.Parameter(model.fl.inv_covars/50.0)
elif covar_type in ['full', 'half_full']:
    # this should be fixed, but im not 100% on how the full matrix affects range
    model.fl.inv_covars = torch.nn.Parameter(model.fl.inv_covars/100.0)


In [15]:
# model stats
print(model)
print([p.size() for p in list(model.parameters())])
# print([p for p in list(model.parameters())])
print("Training?", model.training) 
print(model.state_dict())

print("losses")
print(fgnh.l2_loss(model))
print(fgnl.sigmas_loss(model))

FGNet(
  (fl): FGN_layer()
)
[torch.Size([1, 2]), torch.Size([1]), torch.Size([1, 2]), torch.Size([1, 2, 2])]
Training? True
OrderedDict([('fl.weights', tensor([[0.5010, 0.4483]])), ('fl.biases', tensor([-0.4946])), ('fl.centers', tensor([[-0.0891, -0.1197]])), ('fl.inv_covars', tensor([[[7.0711e-01, 0.0000e+00],
         [4.9488e-06, 7.0711e-01]]]))])
losses
tensor([0.3483], grad_fn=<DivBackward0>)
tensor([-0.3536], grad_fn=<DivBackward0>)


In [16]:
# importances of the constraints
lmbda_l2 = (4.0*0.1/len(my_dataloader.dataset))
lmbda_sigs = 0.05
    
print(lmbda_l2)
print(lmbda_sigs)

# loss function
def loss_func(model, output, target):
    
    # normal MSE loss
    mse_loss = F.mse_loss(output, target)
    
    # sum of sigma squares loss
    sig_loss = fgnl.sigmas_loss(model)

    # l2 loss
    l2_loss = fgnh.l2_loss(model)
    
    # (old) negative log likelihood over gaussian loss
#     nllg_loss = 100.0*lmbda*torch.sum(likelihood)
#     nllg_loss = 0.0*torch.mean(likelihood)
    
    return mse_loss + lmbda_l2*l2_loss + lmbda_sigs*sig_loss 

0.000125
0.05


In [17]:
# number of correct pred function
def pred_func(output, target):
    # (old)split output into pred and likelihoods
#     output,_ = output
    pred = torch.Tensor.sign(output)
    correct = pred.eq(target.view_as(pred)).sum().item()   
    return correct

In [18]:
# model_optimizer
# optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.5, momentum=0.9, nesterov=True)
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.06,)
# optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)
# optimizer = optim.Adadelta(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1, )
# optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.1)


# # optimize all but sigma
# optimizer = optim.Adam(filter(lambda p: p.requires_grad, [p for n,p in model.named_parameters()
#                                                           if 'inv_covar' not in n]), lr=0.02,)


In [19]:
model.state_dict()

OrderedDict([('fl.weights', tensor([[0.5010, 0.4483]])),
             ('fl.biases', tensor([-0.4946])),
             ('fl.centers', tensor([[-0.0891, -0.1197]])),
             ('fl.inv_covars',
              tensor([[[7.0711e-01, 0.0000e+00],
                       [4.9488e-06, 7.0711e-01]]]))])

In [20]:
epochs = 100
train_res = fgnh.train(model, my_dataloader, loss_func, optimizer, epochs, save_hist=2, verbose=True, pred_func=pred_func, test_loader=my_test_dataloader)

Epoch 0 Train set - Average loss: 0.9965, Accuracy: 1608/3200 (50%)
Test set - Average loss: 0.9502, Accuracy: 494/800 (62%)
Epoch 1 Train set - Average loss: 0.9645, Accuracy: 1911/3200 (60%)
Test set - Average loss: 0.9287, Accuracy: 555/800 (69%)
Epoch 2 Train set - Average loss: 0.9415, Accuracy: 2144/3200 (67%)
Test set - Average loss: 0.9130, Accuracy: 584/800 (73%)
Epoch 3 Train set - Average loss: 0.9245, Accuracy: 2292/3200 (72%)
Test set - Average loss: 0.9004, Accuracy: 612/800 (76%)
Epoch 4 Train set - Average loss: 0.9110, Accuracy: 2405/3200 (75%)
Test set - Average loss: 0.8893, Accuracy: 624/800 (78%)
Epoch 5 Train set - Average loss: 0.8992, Accuracy: 2481/3200 (78%)
Test set - Average loss: 0.8782, Accuracy: 638/800 (80%)
Epoch 6 Train set - Average loss: 0.8877, Accuracy: 2557/3200 (80%)
Test set - Average loss: 0.8661, Accuracy: 654/800 (82%)
Epoch 7 Train set - Average loss: 0.8754, Accuracy: 2616/3200 (82%)
Test set - Average loss: 0.8521, Accuracy: 665/800 (83%)


Test set - Average loss: 0.0455, Accuracy: 800/800 (100%)
Epoch 66 Train set - Average loss: 0.0383, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0456, Accuracy: 800/800 (100%)
Epoch 67 Train set - Average loss: 0.0381, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0456, Accuracy: 800/800 (100%)
Epoch 68 Train set - Average loss: 0.0380, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0455, Accuracy: 800/800 (100%)
Epoch 69 Train set - Average loss: 0.0378, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0453, Accuracy: 800/800 (100%)
Epoch 70 Train set - Average loss: 0.0377, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0450, Accuracy: 800/800 (100%)
Epoch 71 Train set - Average loss: 0.0375, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0446, Accuracy: 800/800 (100%)
Epoch 72 Train set - Average loss: 0.0373, Accuracy: 3200/3200 (100%)
Test set - Average loss: 0.0442, Accuracy: 800/800 (100%)
Epoch 73 Train set - Average loss: 0.0370, Acc

In [21]:
# model stats post training
print(model)
print([p.size() for p in list(model.parameters())])
# print([p for p in list(model.parameters())])
print("Training?", model.training) 
print(model.state_dict())

print("losses")
print(fgnh.l2_loss(model))
print(fgnl.sigmas_loss(model))

FGNet(
  (fl): FGN_layer()
)
[torch.Size([1, 2]), torch.Size([1]), torch.Size([1, 2]), torch.Size([1, 2, 2])]
Training? False
OrderedDict([('fl.weights', tensor([[ 0.9670, -2.7715]])), ('fl.biases', tensor([-1.3940])), ('fl.centers', tensor([[0.7073, 1.0614]])), ('fl.inv_covars', tensor([[[ 0.1046,  0.0000],
         [ 0.0247, -0.1123]]]))])
losses
tensor([5.2798], grad_fn=<DivBackward0>)
tensor([-0.0542], grad_fn=<DivBackward0>)


In [22]:
# compare center with target (1,1)

# plot the theoritical zero line (the separator )
b = -np.matmul(sep_plane,data_centers)
X = np.arange(-0.5,1.6, 0.1)
zero_line = -(sep_plane[0]*X+b)/sep_plane[1]
plt.plot(X,zero_line, color='black', label='theoretical separator')

# final zero line
W = model.fl.weights.detach().numpy()[0]
center = model.fl.centers.detach().numpy()[0]
b = -np.matmul(W,center)
final_zero_line = -(W[0]*X+b)/W[1]
plt.plot(X,final_zero_line, color='yellow', label='final separator')


x, y = zip(*train_res['histories']['fl.centers'][:,0,:])
plt.plot(x,y, marker='.', linestyle='-', label='path of neuron center')

# plot start
x,y = train_res['histories']['fl.centers'][0,0,:]
plt.plot(x,y, marker='o', linestyle=' ', markersize=6, color="red", label='start of neuron center')
# plot target
x, y = data_centers
plt.plot(x,y, marker='o', linestyle=' ', markersize=6, color="black", label='theoretical center')


# uncomment if you want to see the data and tighten the window
plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='red')
plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='blue')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.legend()
plt.axis([-0.5,1.5, -0.5,1.5])
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [23]:
# check that inv covar has gone up, sigmas down, and trace down if covar_type=='full'
fgnl.plot_sigmas_histories(train_res['histories'], covar_type=covar_type)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
# acc and loss hist
plt.plot(train_res['train_loss_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_loss_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Loss')
plt.show()

plt.plot(train_res['train_acc_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_acc_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Accuracy')
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
# visualize neuron activity over data space
# extracts the parameters and shows theoritial neuron activity

from matplotlib import cm
# scale of the heat maps
scale = np.ceil(np.max(samples_xs))
X1 = np.arange(-scale,scale+0.1, 0.1)
X2 = np.arange(-scale,scale+0.1, 0.1)
X1s, X2s = np.meshgrid(X1,X2)
heatmap_inputs = np.reshape(list(zip(X1s.flatten(),X2s.flatten())),(-1,2))

# combining circular gaussian with normal neuron
# neuron parameters (weights, bias)
W = model.fl.weights.detach().numpy()[0]
print("weights:",W)
# b = 0 # bias defined by by the center of radial function

# radial parameters
center = model.fl.centers.detach().numpy()[0] # controls the center of gaussian (<=> bias of neuron)
# plot center as red dot
plt.plot(center[0], center[1], marker='o', markersize=4, color="red")

print("center:",center)

# if bias is defined by center
b1 = -np.matmul(W,center)
# if bias is a free param
b2 = model.fl.biases.detach().numpy()[0]
print("bias from centers:",b1)
print("bias from params (same as above if free_biases==False):",b2)
# new zero line
zero_line = -(W[0]*X1+b2)/W[1]

# heatmap neuronal activity
n_activity = np.sum(W*heatmap_inputs, axis=1)+b2
# print(n_activity.shape)

distances = heatmap_inputs-center

# heatmap radial activity 
if covar_type == 'sphere':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    r_activity = np.exp((-1.0/abs(sig)**2) *  np.sum(np.square(heatmap_inputs-center), axis=1))
    
elif covar_type == 'diag':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    ded = np.einsum('ij,ij->i', distances*abs(inv_covar)**2, distances)
    r_activity = np.exp(-ded)

else:
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = np.linalg.inv(np.matmul(inv_covar,inv_covar))
    ded = np.einsum('zi,ij,kj,zk->z', distances, inv_covar, inv_covar, distances)
    r_activity = np.exp(-ded)

print("inv covar:", inv_covar)
print("sig:", sig)

    
# overall heatmap activity
activity = n_activity*r_activity
# clip to +-1
activity = np.clip(activity, a_max=0.999, a_min=-1.0)

# plot the zero line
plt.plot(X1,zero_line, color='black')
# plot the heatmap 
maxi = 1.0
levels = np.arange(-maxi, maxi+0.1,maxi/10.0)
ticks = np.arange(-maxi, maxi+0.1, maxi/5.0)


plt.contourf(X1s, X2s, np.reshape(activity, np.shape(X1s) ), levels=levels, cmap=cm.RdYlBu_r)

plt.colorbar(ticks=ticks)
#reset axes
plt.axis([-scale,scale, -scale, scale])
plt.grid(True)
plt.show()

weights: [ 0.96702504 -2.7714746 ]
center: [0.7073024 1.061397 ]
bias from centers: 2.2576556
bias from params (same as above if free_biases==False): -1.39403
inv covar: [[ 0.10457687  0.        ]
 [ 0.02465255 -0.1123344 ]]
sig: [[91.43842    0.       ]
 [ 1.3857615 79.24547  ]]


<IPython.core.display.Javascript object>

In [26]:
# applies the model to get the actual heatmap
# results might be slightly different based on adding a tanh() or not, clipping of output, ordinal!=2 etc...
# should be close to above
model.eval()
heatmap_inputs_torch = torch.Tensor(heatmap_inputs)
heatmap_preds = model(heatmap_inputs_torch.to(device))
heatmap_preds = heatmap_preds.cpu().detach().numpy()

In [27]:
levels = np.arange(-1.0, 1.0+0.001, 5**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(heatmap_preds[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# uncomment if you want to see the data and tighten the window
plt.scatter(samples_x_1, samples_y_1, alpha=0.05, c='red')
plt.scatter(samples_x_2, samples_y_2, alpha=0.05, c='blue')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [28]:
# now train with much larger lmbda_sigs

In [29]:
# importances of the constraints
lmbda_l2 = (4.0*0.1/len(my_dataloader.dataset))
lmbda_sigs *= 5

print(lmbda_l2)
print(lmbda_sigs)

# loss function
def loss_func(model, output, target):
    
    # normal MSE loss
    mse_loss = F.mse_loss(output, target)
    
    # sum of sigma squares loss
    sig_loss = fgnl.sigmas_loss(model)

    # l2 loss
    l2_loss = fgnh.l2_loss(model)
    
    # (old) negative log likelihood over gaussian loss
#     nllg_loss = 100.0*lmbda*torch.sum(likelihood)
#     nllg_loss = 0.0*torch.mean(likelihood)
    
    return mse_loss + lmbda_l2*l2_loss + lmbda_sigs*sig_loss 

0.000125
0.25


In [30]:
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.03,)

In [31]:
epochs = 100
train_res = fgnh.train(model, my_dataloader, loss_func, optimizer, epochs, save_hist=2, verbose=True, pred_func=pred_func, test_loader=my_test_dataloader)

Epoch 0 Train set - Average loss: 0.0223, Accuracy: 3186/3200 (100%)
Test set - Average loss: 0.0246, Accuracy: 797/800 (100%)
Epoch 1 Train set - Average loss: 0.0188, Accuracy: 3185/3200 (100%)
Test set - Average loss: 0.0199, Accuracy: 797/800 (100%)
Epoch 2 Train set - Average loss: 0.0155, Accuracy: 3182/3200 (99%)
Test set - Average loss: 0.0172, Accuracy: 798/800 (100%)
Epoch 3 Train set - Average loss: 0.0123, Accuracy: 3186/3200 (100%)
Test set - Average loss: 0.0158, Accuracy: 798/800 (100%)
Epoch 4 Train set - Average loss: 0.0108, Accuracy: 3187/3200 (100%)
Test set - Average loss: 0.0154, Accuracy: 798/800 (100%)
Epoch 5 Train set - Average loss: 0.0104, Accuracy: 3187/3200 (100%)
Test set - Average loss: 0.0137, Accuracy: 798/800 (100%)
Epoch 6 Train set - Average loss: 0.0092, Accuracy: 3187/3200 (100%)
Test set - Average loss: 0.0115, Accuracy: 798/800 (100%)
Epoch 7 Train set - Average loss: 0.0074, Accuracy: 3187/3200 (100%)
Test set - Average loss: 0.0098, Accuracy: 

Test set - Average loss: -0.0073, Accuracy: 798/800 (100%)
Epoch 65 Train set - Average loss: -0.0095, Accuracy: 3189/3200 (100%)
Test set - Average loss: -0.0074, Accuracy: 798/800 (100%)
Epoch 66 Train set - Average loss: -0.0096, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0075, Accuracy: 798/800 (100%)
Epoch 67 Train set - Average loss: -0.0097, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0076, Accuracy: 798/800 (100%)
Epoch 68 Train set - Average loss: -0.0098, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0077, Accuracy: 798/800 (100%)
Epoch 69 Train set - Average loss: -0.0100, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0078, Accuracy: 798/800 (100%)
Epoch 70 Train set - Average loss: -0.0101, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0080, Accuracy: 798/800 (100%)
Epoch 71 Train set - Average loss: -0.0102, Accuracy: 3192/3200 (100%)
Test set - Average loss: -0.0082, Accuracy: 798/800 (100%)
Epoch 72 Train set - Average lo

In [32]:
# model stats post tunning
print(model)
print([p.size() for p in list(model.parameters())])
# print([p for p in list(model.parameters())])
print("Training?", model.training) 
print(model.state_dict())

print("losses")
print(fgnh.l2_loss(model))
print(fgnl.sigmas_loss(model))

FGNet(
  (fl): FGN_layer()
)
[torch.Size([1, 2]), torch.Size([1]), torch.Size([1, 2]), torch.Size([1, 2, 2])]
Training? False
OrderedDict([('fl.weights', tensor([[ 1.7980, -5.1789]])), ('fl.biases', tensor([-2.5375])), ('fl.centers', tensor([[1.3934, 0.0747]])), ('fl.inv_covars', tensor([[[ 0.3909,  0.0000],
         [ 0.2550, -0.2754]]]))])
losses
tensor([18.2463], grad_fn=<DivBackward0>)
tensor([-0.1666], grad_fn=<DivBackward0>)


In [33]:
# compare center with target (1,1)

# plot the zero line
b = -np.matmul(sep_plane,data_centers)
X = np.arange(-0.5,1.6, 0.1)
zero_line = -(sep_plane[0]*X+b)/sep_plane[1]
plt.plot(X,zero_line, color='black', label='theoretical separator')

# final zero line
W = model.fl.weights.detach().numpy()[0]
center = model.fl.centers.detach().numpy()[0]
b = -np.matmul(W,center)
final_zero_line = -(W[0]*X+b)/W[1]
plt.plot(X,final_zero_line, color='yellow', label='final separator')


x, y = zip(*train_res['histories']['fl.centers'][:,0,:])
plt.plot(x,y, marker='.', linestyle='-', label='path of neuron center')

# plot start
x,y = train_res['histories']['fl.centers'][0,0,:]
plt.plot(x,y, marker='o', linestyle=' ', markersize=6, color="red", label='start of neuron center')
# plot target
x, y = data_centers
plt.plot(x,y, marker='o', linestyle=' ', markersize=6, color="black", label='theoretical center')

# uncomment if you want to see the data and tighten the window
plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='red')
plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='blue')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.legend()
plt.axis([-0.5,1.5, -0.5,1.5])
plt.grid()
plt.show()

<IPython.core.display.Javascript object>

In [34]:
# acc and loss hist
plt.plot(train_res['train_loss_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_loss_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Loss')
plt.show()

plt.plot(train_res['train_acc_hist'], marker='.', linestyle=' ', label='train')
plt.plot(train_res['test_acc_hist'], marker='.', linestyle=' ', label='test')
plt.grid()
plt.legend()
plt.title('Accuracy')
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [35]:
# check that inv covar has gone up, sigmas down, and trace down if covar_type=='full'
fgnl.plot_sigmas_histories(train_res['histories'], covar_type=model.covar_type)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [36]:
# visualize neuron activity over data space
# extracts the parameters and shows theoritial neuron activity

# combining circular gaussian with normal neuron
# neuron parameters (weights, bias)
W = model.fl.weights.detach().numpy()[0]
print("weights:",W)
# b = 0 # bias defined by by the center of radial function

# radial parameters
center = model.fl.centers.detach().numpy()[0] # controls the center of gaussian (<=> bias of neuron)
# plot center as red dot
plt.plot(center[0], center[1], marker='o', markersize=4, color="red")

print("center:",center)

# if bias is defined by center
b1 = -np.matmul(W,center)
# if bias is a free param
b2 = model.fl.biases.detach().numpy()[0]
print("bias from centers:",b1)
print("bias from params (same if free_biases==False):",b2)
# new zero line
zero_line = -(W[0]*X1+b2)/W[1]

# heatmap neuronal activity
n_activity = np.sum(W*heatmap_inputs, axis=1)+b
# print(n_activity.shape)

distances = heatmap_inputs-center

# heatmap radial activity 
if covar_type == 'sphere':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    r_activity = np.exp((-1.0/abs(sig)**2) *  np.sum(np.square(heatmap_inputs-center), axis=1))
    
elif covar_type == 'diag':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = 1.0/inv_covar
    ded = np.einsum('ij,ij->i', distances*abs(inv_covar)**2, distances)
    r_activity = np.exp(-ded)

elif covar_type == 'full':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = np.linalg.inv(np.matmul(inv_covar,inv_covar))
    ded = np.einsum('lzi,zik,lzk->lz', distances, inv_covars, distances)
    r_activity = np.exp(-ded)
    
elif covar_type == 'chol':
    inv_covar = model.fl.inv_covars.detach().numpy()[0]
    sig = np.linalg.inv(np.matmul(inv_covar,inv_covar))
    ded = np.einsum('zi,ij,kj,zk->z', distances, inv_covar, inv_covar, distances)
    r_activity = np.exp(-ded)

print("inv covar:", inv_covar)
print("sig:", sig)

    
# overall heatmap activity
activity = n_activity*r_activity
# clip to +-1
activity = np.clip(activity, a_max=0.999, a_min=-1.0)

# plot the zero line
plt.plot(X1,zero_line, color='black')
# plot the heatmap 
# maxi = 1.0
# levels = np.arange(-maxi, maxi+0.1,maxi/10.0)
# ticks = np.arange(-maxi, maxi+0.1, maxi/5.0)

levels = np.arange(-1.0, 1.0+0.001, 1/10.)
ticks = levels[::5]


plt.contourf(X1s, X2s, np.reshape(activity, np.shape(X1s) ), levels=levels, cmap=cm.RdYlBu_r)

plt.colorbar(ticks=ticks)
#reset axes
plt.axis([-scale,scale, -scale, scale])
plt.grid(True)
plt.show()

weights: [ 1.7979598 -5.178891 ]
center: [1.393365  0.0747212]
bias from centers: -2.1182413
bias from params (same if free_biases==False): -2.5375392
inv covar: [[ 0.39090475  0.        ]
 [ 0.25502667 -0.27538553]]
sig: [[ 6.5442233  0.       ]
 [-2.5422354 13.186143 ]]


<IPython.core.display.Javascript object>

In [37]:
# applies the model to get the actual heatmap
# results might be slightly different based on adding a tanh() or not, clipping of output, ordinal!=2 etc...
# should be close to above
model.eval()
heatmap_inputs = torch.Tensor(heatmap_inputs)
heatmap_preds = model(heatmap_inputs.to(device))
heatmap_preds = heatmap_preds.cpu().detach().numpy()

In [38]:
levels = np.arange(-1.0, 1.0+0.001, 5**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(heatmap_preds[:,0], np.shape(X1s) ),levels=levels, cmap= cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# uncomment if you want to see the data and tighten the window
plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='red')
plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='blue')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [39]:
# draw just the gaussian activity and the data
# plot the heatmap 
levels = np.arange(-1.0, 1.0+0.001, 5**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(r_activity, np.shape(X1s) ), levels=levels, cmap= mpl.cm.RdYlBu_r)

plt.colorbar(ticks=ticks)

# uncomment if you want to see the data and tighten the window
plt.scatter(samples_x_1, samples_y_1, alpha=0.03, c='gray')
plt.scatter(samples_x_2, samples_y_2, alpha=0.03, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

#reset axes
plt.axis([-scale,scale, -scale, scale])
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>