In [1]:
# Comparison of training between to similar nets: one classical feedforward and one FGN
# here you can see that a similar FGN (same size per hidden units) can perform as well as classical feedforward network

In [2]:
from __future__ import print_function

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np

import sys
sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research/')
import Finite_Gaussian_Network_lib as fgnl
import Finite_Gaussian_Network_lib.fgn_helper_lib as fgnh

In [5]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
print("Using device:", device)

# manualy set cuda device
torch.cuda.set_device(1)

CUDA Available:  True
Using device: cuda


In [6]:
# # random seeds
# torch.manual_seed(999)
# np.random.seed(999)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [7]:
# MNIST dataset and dataloader declaration
# transforms does both the conversion from 0-255 to 0-1
# and normalizes by the precomputed mean and std

batch_size = 2048

mnist_train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../../MNIST-dataset', train=True, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=True)

mnist_test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../../MNIST-dataset', train=False, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=True)

In [8]:
# show an example
print("Shape:", mnist_train_loader.dataset.train_data.shape)
x = mnist_train_loader.dataset.train_data[0]
print("type:", type(x.numpy()))
print("shape:", x.shape)
# print("sample:", x.numpy())

y = mnist_train_loader.dataset.train_labels[1]
print("Label:",y.numpy())
print("type:", type(y))

plt.imshow(x, cmap=plt.cm.get_cmap('Greys'))
plt.colorbar()
plt.show()

Shape: torch.Size([60000, 28, 28])
type: <type 'numpy.ndarray'>
shape: torch.Size([28, 28])
Label: 0
type: <class 'torch.Tensor'>


<IPython.core.display.Javascript object>

In [9]:
# loss functions for the classic net
lmbda_l2 = (4.0*0.1/len(mnist_train_loader.dataset))
print(lmbda_l2)

def classical_cross_ent_loss(model, output, target):
    cent_loss = F.cross_entropy(output, target.long())
    l2 = fgnh.l2_loss(model)
    return cent_loss + lmbda_l2*l2

6.66666666667e-06


In [10]:
# Initialize the classic network
hidden_l_nums = [100,100,100]
drop_p= 0.01

In [11]:
classic_model = fgnl.Feedforward_Classic_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

In [12]:
classic_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, classic_model.parameters()),momentum=0.5)

In [13]:
# train the model
epochs = 5

In [14]:
classic_train_res = fgnh.train(classic_model, device, mnist_train_loader, 
                             classical_cross_ent_loss, classic_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=mnist_test_loader)

Epoch 0 Train set - Average loss: 0.8819, Accuracy: 43644/60000 (73%)
Test set - Average loss: 0.4997, Accuracy: 8368/10000 (84%)
Epoch 1 Train set - Average loss: 0.3291, Accuracy: 54033/60000 (90%)
Test set - Average loss: 0.5247, Accuracy: 8399/10000 (84%)
Epoch 2 Train set - Average loss: 0.2680, Accuracy: 55100/60000 (92%)
Test set - Average loss: 0.2553, Accuracy: 9234/10000 (92%)
Epoch 3 Train set - Average loss: 0.2309, Accuracy: 55831/60000 (93%)
Test set - Average loss: 0.2564, Accuracy: 9220/10000 (92%)
Epoch 4 Train set - Average loss: 0.2101, Accuracy: 56171/60000 (94%)
Test set - Average loss: 0.3398, Accuracy: 8975/10000 (90%)


In [15]:
### INTERLUDE

In [16]:
### Loss Functions for the FGN

# importances of the constraints
lmbda_l2 = (4.0*0.1/len(mnist_train_loader.dataset))
lmbda_sigs = 1.01*lmbda_l2
# sig_params = sum(p.numel() for n,p in fgn_model.named_parameters() if 'sigs' in n)
# lmbda_sigs = (1.0/sig_params)

# lmbda_l2 = 0.0
# lmbda_sigs = 0.0
print("lambda for l2 loss", lmbda_l2)
print("lambda for sigs^2 loss", lmbda_sigs)

def fgn_cross_ent_loss(model, output, target):
    
    # normal Cent loss
    cent_loss = F.cross_entropy(output, target.long())
    # normal l2 loss
    l2 = fgnh.l2_loss(model)
    #sigma loss
    sig_loss = fgnl.sigmas_loss(model)
    
    return cent_loss + lmbda_l2*l2 + lmbda_sigs*sig_loss


lambda for l2 loss 6.66666666667e-06
lambda for sigs^2 loss 6.73333333333e-06


In [17]:
# Initialize the fgn network
print("hidden layers",hidden_l_nums)
print("drop_p=", drop_p)
fgn_model = fgnl.Feedforward_FGN_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

hidden layers [100, 100, 100]
drop_p= 0.01


In [18]:
fgn_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, fgn_model.parameters()),momentum=0.5)

In [19]:
fgn_train_res = fgnh.train(fgn_model, device, mnist_train_loader, 
                             fgn_cross_ent_loss, fgn_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=mnist_test_loader)

Epoch 0 Train set - Average loss: 2.3451, Accuracy: 23241/60000 (39%)
Test set - Average loss: 4.8287, Accuracy: 1188/10000 (12%)
Epoch 1 Train set - Average loss: 0.7438, Accuracy: 49362/60000 (82%)
Test set - Average loss: 1.7888, Accuracy: 4297/10000 (43%)
Epoch 2 Train set - Average loss: 0.4959, Accuracy: 53858/60000 (90%)
Test set - Average loss: 0.6156, Accuracy: 8573/10000 (86%)
Epoch 3 Train set - Average loss: 0.4217, Accuracy: 55168/60000 (92%)
Test set - Average loss: 0.5995, Accuracy: 8684/10000 (87%)
Epoch 4 Train set - Average loss: 0.3782, Accuracy: 55875/60000 (93%)
Test set - Average loss: 0.4227, Accuracy: 9170/10000 (92%)


In [20]:
# acc and loss hist
plt.plot(fgn_train_res['train_loss_hist'], marker='.', linestyle=' ', label='fgn train')
plt.plot(fgn_train_res['test_loss_hist'], marker='.', linestyle=' ', label='fgn test')
plt.plot(classic_train_res['train_loss_hist'], marker='.', linestyle=' ', label='classic train')
plt.plot(classic_train_res['test_loss_hist'], marker='.', linestyle=' ', label='classic test')
plt.grid()
plt.legend()
plt.title('Loss')
plt.show()

plt.plot(fgn_train_res['train_acc_hist'], marker='.', linestyle=' ', label='fgn train')
plt.plot(fgn_train_res['test_acc_hist'], marker='.', linestyle=' ', label='fgn test')
plt.plot(classic_train_res['train_acc_hist'], marker='.', linestyle=' ', label='classic train')
plt.plot(classic_train_res['test_acc_hist'], marker='.', linestyle=' ', label='classic test')
plt.grid()
plt.legend()
plt.title('Accuracy')
plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [21]:
# plot centers history for only some hidden layer neuron

for k in fgn_train_res['histories'].keys():
    if 'centers' in k:
        print(k)
        plt.figure(figsize=(6,6))
        # print(np.shape(fgn_fgn_train_res['histories'][k]))
        # choose 4random neurons in the layer to print
        neurons =  np.random.choice(range(np.shape(fgn_train_res['histories'][k])[1]),2, replace=False)
        print("plotting neurons:", neurons)
        # plt.figure(figsize=(2,2))

        for idx,n in enumerate(neurons):
            x = fgn_train_res['histories'][k][:,n,:]
            # print(np.shape(x))
            plt.subplot(2,1,idx+1)
            plt.plot(x, marker='.', linestyle='-')
            # plt.axis([-3,3, -3, 3])
            plt.grid()
        plt.show()
            
            

hidden_layers.4.centers
plotting neurons: [99 28]


<IPython.core.display.Javascript object>

hidden_layers.7.centers
plotting neurons: [95 57]


<IPython.core.display.Javascript object>

hidden_layers.1.centers
plotting neurons: [23 55]


<IPython.core.display.Javascript object>

fl.centers
plotting neurons: [1 4]


<IPython.core.display.Javascript object>

In [22]:
# check that sigma has shrunk
# plot sigma history
for k in fgn_train_res['histories'].keys():
    if 'sigmas' in k:
        print(k)
        for i in range(len(fgn_train_res['histories'][k][0])):
            plt.plot(fgn_train_res['histories'][k][:,i], marker='.', linestyle=' ')
        plt.grid()
        plt.show()

hidden_layers.4.sigmas


<IPython.core.display.Javascript object>

hidden_layers.7.sigmas


<IPython.core.display.Javascript object>

hidden_layers.1.sigmas


<IPython.core.display.Javascript object>

fl.sigmas


<IPython.core.display.Javascript object>