In [1]:
# In this Notebook I show that converting a normal feedforward net to a FGN is easy
# (first over a toy 2D dataset)

In [2]:
from __future__ import print_function

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
import scipy as sp
from scipy import stats
from sklearn import datasets
from sklearn.utils import shuffle as shuffle_in_unison


import sys
sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research/')
import Finite_Gaussian_Network_lib as fgnl
import Finite_Gaussian_Network_lib.fgn_helper_lib as fgnh

In [5]:
!gpustat

[1mcrescent[0m  Thu Sep 19 10:23:10 2019
[0;36m[0][0m [0;34mGeForce GTX 1080[0m |[1;31m 82'C[0m, [1;32m 98 %[0m | [0;36m[1;33m 7901[0m / [0;33m 8119[0m MB | [1;30msalami[0m([0;33m2935M[0m) [1;30mfelix[0m([0;33m625M[0m) [1;30mdenys[0m([0;33m4329M[0m)
[0;36m[1][0m [0;34mGeForce GTX 1080[0m |[1;31m 82'C[0m, [1;32m 72 %[0m | [0;36m[1;33m 4546[0m / [0;33m 8119[0m MB | [1;30mfelix[0m([0;33m625M[0m) [1;30msoumi[0m([0;33m3909M[0m)


In [6]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
print("Using device:", device)

# manualy set cuda device

torch.cuda.set_device(1)

CUDA Available:  True
Using device: cuda


In [7]:
# # random seeds
# torch.manual_seed(999)
# np.random.seed(999)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [8]:
total_num_samples = 5000

# random moons
samples, labels = datasets.make_moons(n_samples=total_num_samples, noise=0.2, shuffle=True)

# random concentric circles
# samples, labels = datasets.make_circles(n_samples=total_num_samples, noise=0.08, factor=0.4, shuffle=True)

# train/test split
ratio=4.0/5.0
split_index = int(len(samples)*ratio)

train_samples, train_labels = samples[:split_index], labels[:split_index]
test_samples, test_labels = samples[split_index:], labels[split_index:]

num_samples = len(train_samples)

In [9]:
# 2D Check 
samples_x_1 = [x for x,l in zip(train_samples[:,0], train_labels) if l==0]
samples_y_1 = [y for y,l in zip(train_samples[:,1], train_labels) if l==0]

samples_x_2 = [x for x,l in zip(train_samples[:,0], train_labels) if l==1]
samples_y_2 = [y for y,l in zip(train_samples[:,1], train_labels) if l==1]


plt.scatter(samples_x_2, samples_y_2, alpha=0.3)
plt.scatter(samples_x_1, samples_y_1, alpha=0.3)
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [10]:
# convert data to pytorch format 
tensor_train_x = torch.Tensor(train_samples)
tensor_train_y = torch.Tensor(train_labels)

tensor_test_x = torch.Tensor(test_samples)
tensor_test_y = torch.Tensor(test_labels)

my_dataset = torch.utils.data.TensorDataset(tensor_train_x, tensor_train_y, ) # create your dataset
my_test_data = torch.utils.data.TensorDataset(tensor_test_x,tensor_test_y) # create your dataset

my_dataloader = torch.utils.data.DataLoader(my_dataset, batch_size=32, shuffle=True) # create your dataloader
my_test_dataloader = torch.utils.data.DataLoader(my_test_data, shuffle=True) # create your dataloader

In [11]:
# loss functions for the classic net
lmbda_l2 = (4.0*0.1/len(my_dataloader.dataset))
print(lmbda_l2)
      
classical_cross_ent_loss = fgnh.def_classical_cross_ent_loss(lmbda_l2)

0.0001


In [12]:
# Initialize the classic network
hidden_layer_sizes = [32]
drop_p = 0.2
classic_model = fgnl.Feedforward_Classic_net(in_feats=2, out_feats=2, hidden_layer_sizes=hidden_layer_sizes, drop_p=drop_p)
classic_model.to(device)

Feedforward_Classic_net(
  (hidden_layers): ModuleList(
    (0): Dropout(p=0.2)
    (1): Linear(in_features=2, out_features=32, bias=True)
    (2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.2)
  )
  (ib): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fl): Linear(in_features=32, out_features=2, bias=True)
)

In [13]:
classic_optimizer = optim.Adam(filter(lambda p: p.requires_grad, classic_model.parameters()), lr=0.001)

In [14]:
epochs = 15

In [15]:
classic_train_res = fgnh.train(classic_model, my_dataloader, 
                             classical_cross_ent_loss, classic_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=my_test_dataloader)

Epoch 0 Train set - Average loss: 0.5428, Accuracy: 2802/4000 (70%)
Test set - Average loss: 0.3451, Accuracy: 869/1000 (87%)
Epoch 1 Train set - Average loss: 0.3775, Accuracy: 3276/4000 (82%)
Test set - Average loss: 0.3035, Accuracy: 871/1000 (87%)
Epoch 2 Train set - Average loss: 0.3758, Accuracy: 3246/4000 (81%)
Test set - Average loss: 0.2971, Accuracy: 876/1000 (88%)
Epoch 3 Train set - Average loss: 0.3835, Accuracy: 3279/4000 (82%)
Test set - Average loss: 0.3013, Accuracy: 871/1000 (87%)
Epoch 4 Train set - Average loss: 0.3715, Accuracy: 3271/4000 (82%)
Test set - Average loss: 0.2977, Accuracy: 869/1000 (87%)
Epoch 5 Train set - Average loss: 0.3840, Accuracy: 3222/4000 (81%)
Test set - Average loss: 0.3027, Accuracy: 875/1000 (88%)
Epoch 6 Train set - Average loss: 0.3705, Accuracy: 3292/4000 (82%)
Test set - Average loss: 0.2948, Accuracy: 875/1000 (88%)
Epoch 7 Train set - Average loss: 0.3607, Accuracy: 3318/4000 (83%)
Test set - Average loss: 0.2854, Accuracy: 875/100

In [16]:
# scale of the heat maps
X1 = np.arange(-10.0, 10.01, 0.1)
X1s, X2s = np.meshgrid(X1,X1)
heatmap_inputs = np.reshape(zip(X1s.flatten(),X2s.flatten()),(-1,2))
print("shape of heatmap", np.shape(heatmap_inputs))
heatmap_inputs = torch.Tensor(heatmap_inputs)
print(type(heatmap_inputs))
# print(heatmap_inputs)

shape of heatmap (40401, 2)
<class 'torch.Tensor'>


In [17]:
# compute predictions
classic_model.eval()
classic_heatmap_preds = classic_model(heatmap_inputs.to(device))
classic_heatmap_preds = classic_heatmap_preds.cpu().detach().numpy()
# apply softmax for probs
classic_heatmap_preds_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in classic_heatmap_preds])

In [18]:
levels = np.arange(-0, 1.0+0.001, 10**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(classic_heatmap_preds_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.grid(True)
plt.title("Trained classic network")
plt.show()

<IPython.core.display.Javascript object>

In [19]:
### FGN NETWORK

In [20]:
# Initialize the fgn network
print("hidden layers:", hidden_layer_sizes)
print("drop prob:", drop_p)
# fgn specific params
covar_type = 'diag'
print("covariance:", covar_type)
ordinal = float(2)
print("ordinal: {0:.10f}".format(ordinal))
non_lin = torch.tanh
# non_lin = False
print("non linearity", non_lin)
noisy_centers = False
print("noisy centers:", noisy_centers)
random_eval = False
print("Random Eval", random_eval)

hidden layers: [32]
drop prob: 0.2
covariance: diag
ordinal: 2.0000000000
non linearity <built-in function tanh>
noisy centers: False
Random Eval False


In [21]:
fgn_model = fgnl.Feedforward_FGN_net(in_feats=2, out_feats=2, hidden_layer_sizes=hidden_layer_sizes, drop_p=drop_p,
                                     covar_type=covar_type, ordinal=ordinal, non_lin=non_lin, 
                                     noisy_centers=noisy_centers)

fgn_model.to(device)

Feedforward_FGN_net(
  (hidden_layers): ModuleList(
    (0): Dropout(p=0.2)
    (1): FGN_layer()
    (2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Dropout(p=0.2)
  )
  (ib): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fl): FGN_layer()
)

In [22]:
# What is the activity of the FGN before conversion?
fgn_test_res_traindata_pre = fgnh.test(fgn_model, my_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

fgn_test_res_testdata_pre = fgnh.test(fgn_model, my_test_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

Test set - Average loss: 1.1505, Accuracy: 1323/4000 (33%)
Test set - Average loss: 1.1396, Accuracy: 337/1000 (34%)


In [23]:
# compute predictions
fgn_model.eval()
fgn_model.set_random_eval(True)
converted_fgn_heatmap_preds = fgn_model(heatmap_inputs.to(device))
converted_fgn_heatmap_preds = converted_fgn_heatmap_preds.cpu().detach().numpy()
# apply softmax for probs
converted_fgn_heatmap_preds_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in converted_fgn_heatmap_preds.astype('float128')])

In [24]:
levels = np.arange(-0.0, 1.0+0.001, 10**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(converted_fgn_heatmap_preds_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.title("Untrained/unconverted FGN network")
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [25]:
# convert the network
fgnl.convert_classic_to_fgn(classic_model, fgn_model)

In [26]:
# make sure performance after isnt crap
fgn_test_res_traindata_post = fgnh.test(fgn_model, my_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

fgn_test_res_traindata_post = fgnh.test(fgn_model, my_test_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

Test set - Average loss: 0.2855, Accuracy: 3716/4000 (93%)
Test set - Average loss: 0.2825, Accuracy: 929/1000 (93%)


In [27]:
# compute predictions
fgn_model.eval()
converted_fgn_heatmap_preds_post = fgn_model(heatmap_inputs.to(device))
converted_fgn_heatmap_preds_post = converted_fgn_heatmap_preds_post.cpu().detach().numpy()
# apply softmax for probs
converted_fgn_heatmap_preds_post_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in converted_fgn_heatmap_preds_post.astype('float128')])

In [28]:
levels = np.arange(-0.0, 1.0+0.001, 10**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(converted_fgn_heatmap_preds_post_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.title("Converted FGN network")
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [29]:
# switch to random eval
# shouldn't change because of the large sigma/small inv_covar
fgn_model.set_random_eval(True)
fgn_test_res_traindata_post_randomeval = fgnh.test(fgn_model, my_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

fgn_test_res_testndata_post_randomeval = fgnh.test(fgn_model, my_test_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

Test set - Average loss: 0.2855, Accuracy: 3716/4000 (93%)
Test set - Average loss: 0.2825, Accuracy: 929/1000 (93%)


In [30]:
# compute predictions
converted_fgn_heatmap_preds_post = fgn_model(heatmap_inputs.to(device))
converted_fgn_heatmap_preds_post = converted_fgn_heatmap_preds_post.cpu().detach().numpy()
# apply softmax for probs
converted_fgn_heatmap_preds_post_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in converted_fgn_heatmap_preds_post.astype('float128')])

In [31]:
# plot heatmap
levels = np.arange(-0.0, 1.0+0.001, 10**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(converted_fgn_heatmap_preds_post_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.title("Converted FGN network - random eval")
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [32]:
# adjusts sigmas
# - should be fast (one epoch)
# - shouldn't impact accuracy

In [33]:
# method one: train with CENT loss but only adjust sigmas

In [34]:
# only train inv_covar
# adjust_sigmas_optimizer = optim.Adam(zip(*filter(lambda p: p[1].requires_grad and 'inv_covar' in p[0], 
#                                                  fgn_model.named_parameters()))[1], 
#                                      lr=0.001)
adjust_sigmas_optimizer = optim.RMSprop(zip(*filter(lambda p: p[1].requires_grad and 'inv_covar' in p[0], 
                                                 fgn_model.named_parameters()))[1], 
                                         )
# adjust_sigmas_optimizer = optim.Adam(filter(lambda p: p.requires_grad, fgn_model.parameters()), lr=0.001)
adjust_sigmas_loss = fgnl.def_fgn_cross_ent_loss(lmbda_l2=0.0, lmbda_sigs=1e1*lmbda_l2)

In [35]:
fgn_model.train()
fgn_model.set_random_eval(False)

converted_train_res = fgnh.train(fgn_model, my_dataloader, 
                             adjust_sigmas_loss, adjust_sigmas_optimizer, epochs=1, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=my_test_dataloader)

Epoch 0 Train set - Average loss: 3.0728, Accuracy: 3368/4000 (84%)
Test set - Average loss: 0.3988, Accuracy: 940/1000 (94%)


In [36]:
# check accuracy with random eval
fgn_model.set_random_eval(True)
fgn_test_res_traindata_post_randomeval = fgnh.test(fgn_model, my_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

fgn_test_res_testndata_post_randomeval = fgnh.test(fgn_model, my_test_dataloader,
                        classical_cross_ent_loss, pred_func=fgnh.cross_ent_pred_accuracy, verbose=True)

Test set - Average loss: 0.3111, Accuracy: 3740/4000 (94%)
Test set - Average loss: 0.3081, Accuracy: 940/1000 (94%)


In [37]:
# check that inv covar has gone up, sigmas down, and trace down if covar_type=='full'

history_to_plot = converted_train_res['histories']

for k in history_to_plot.keys():
    if 'inv_covar' in k:
        print(k)
        s = history_to_plot[k].shape
        print(s)
        neurons =  np.random.choice(range(s[1]),2, replace=False)
        print("plotting neurons:", neurons)
        if covar_type == 'diag':
            # for each neuron
            for i in neurons:
                plt.plot(history_to_plot[k][:,i].reshape(s[0],np.prod(s[2:])), marker='.', linestyle=' ')
                plt.title('Neuron {}: Inverse Covariance'.format(i))
                plt.grid()
                plt.show()
                
                plt.plot(1.0/history_to_plot[k][:,i].reshape(s[0],np.prod(s[2:])), marker='.', linestyle=' ')
                plt.title('Neuron {}: Sigmas'.format(i))
                plt.grid()
                plt.show()
        
        elif covar_type == 'full':
            # plot trace of each neuron
            for i in neurons:
                # plot trace
                trace = [np.einsum('ik,ik->', p, p) for p in history_to_plot[k][:,i]]
                plt.plot(trace, marker='.', linestyle=' ')
            plt.title('Neuron{}: Trace'.format(i))
            plt.grid()
            plt.show()
        
        
        else:
            # covar_type == 'sphere'
            plt.plot(history_to_plot[k], marker='.', linestyle=' ',)
            plt.title('Inverse Covariance')
            plt.grid()
            plt.show()
            
            plt.plot(1.0/history_to_plot[k], marker='.', linestyle=' ')
            plt.title('Sigma')

            plt.grid()
            plt.show()

fl.inv_covar
(126, 2, 32)
plotting neurons: [1 0]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

hidden_layers.1.inv_covar
(126, 32, 2)
plotting neurons: [23 13]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [38]:
# plot centers history for only some hidden layer neuron
# for full FGN

for k in converted_train_res['histories'].keys():
    if 'centers' in k:
        print(k)
        plt.figure(figsize=(6,6))
        # print(np.shape(fgn_fgn_train_res['histories'][k]))
        # choose 2 random neurons in the layer to print
        neurons =  np.random.choice(range(np.shape(converted_train_res['histories'][k])[1]),2, replace=False)
        print("plotting neurons:", neurons)
        # plt.figure(figsize=(2,2))

        for idx,n in enumerate(neurons):
            x = converted_train_res['histories'][k][:,n,:]
            # print(np.shape(x))
            plt.subplot(2,1,idx+1)
            plt.plot(x, marker='.', linestyle='-')
            # plt.axis([-3,3, -3, 3])
            plt.grid()
        plt.show()

hidden_layers.1.centers
plotting neurons: [ 3 11]


<IPython.core.display.Javascript object>

fl.centers
plotting neurons: [0 1]


<IPython.core.display.Javascript object>

In [39]:
fgn_model.eval()
fgn_model.set_random_eval(True)
# compute predictions
converted_fgn_heatmap_preds_post = fgn_model(heatmap_inputs.to(device))
converted_fgn_heatmap_preds_post = converted_fgn_heatmap_preds_post.cpu().detach().numpy()
# apply softmax for probs
converted_fgn_heatmap_preds_post_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in converted_fgn_heatmap_preds_post.astype('float128')])

In [40]:
# plot heatmap
levels = np.arange(-0.0, 1.0+0.001, 10**(-2))
ticks = levels[::5]

plt.contourf(X1s, X2s, np.reshape(converted_fgn_heatmap_preds_post_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
plt.colorbar(ticks=ticks)

# # uncomment if you want to see the data and tighten the window
# plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# plt.axis([-1.5,2.5, -1, 1.5])

plt.title("Converted and sigma adjusted FGN network - random eval")
plt.grid(True)
plt.show()

<IPython.core.display.Javascript object>

In [41]:
# old, uses a function
# fgnl.adjust_sigma_pred_func(converted_fgn_model, my_dataloader, fgnh.cross_ent_pred_accuracy, verbose=True)

In [42]:
# # make sure performance after isnt crap
# fgn_test_res_post = fgnh.test(converted_fgn_model, my_dataloader, 
#                      fgn_cross_ent_loss, verbose=True, 
#                      pred_func=fgnh.cross_ent_pred_accuracy)

In [43]:
# # compute predictions
# converted_fgn_model.eval()
# converted_fgn_heatmap_preds_post = converted_fgn_model(heatmap_inputs.to(device))
# converted_fgn_heatmap_preds_post = converted_fgn_heatmap_preds_post.cpu().detach().numpy()
# # apply softmax for probs
# converted_fgn_heatmap_preds_post_softmax = np.array([np.exp(x)/sum(np.exp(x)) for x in converted_fgn_heatmap_preds_post.astype('float128')])

In [44]:
# levels = np.arange(-0.0, 1.0+0.001, 10**(-2))
# ticks = levels[::5]

# plt.contourf(X1s, X2s, np.reshape(converted_fgn_heatmap_preds_post_softmax[:,0], np.shape(X1s) ),levels=levels, cmap= mpl.cm.RdYlBu_r)
# plt.colorbar(ticks=ticks)

# # # uncomment if you want to see the data and tighten the window
# # plt.scatter(samples_x_1, samples_y_1, alpha=0.1, c='gray')
# # plt.scatter(samples_x_2, samples_y_2, alpha=0.1, c='gray')
# # plt.axis([-1.5,2.5, -1, 1.5])


# plt.grid(True)
# plt.show()