In [1]:
# comparison of how well a FGN model converted from a classic model performs

# note that to get the exact same results, a tanh(x) need to be added after each fgn layer.

In [2]:
from __future__ import print_function

In [3]:
import matplotlib as mpl
# set this 'backend' when using jupyter; do this before importing pyplot
mpl.use('nbagg')
import matplotlib.pyplot as plt

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np

import sys
sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research/')
import Finite_Gaussian_Network_lib as fgnl
import Finite_Gaussian_Network_lib.fgn_helper_lib as fgnh


In [5]:
!gpustat

[1mcrescent[0m  Wed Sep 18 16:33:58 2019
[0;36m[0][0m [0;34mGeForce GTX 1080[0m |[1;31m 71'C[0m, [1;32m 76 %[0m | [0;36m[1;33m 6495[0m / [0;33m 8119[0m MB | [1;30msalami[0m([0;33m2935M[0m) [1;30mfelix[0m([0;33m625M[0m) [1;30msalami[0m([0;33m2923M[0m)
[0;36m[1][0m [0;34mGeForce GTX 1080[0m |[1;31m 81'C[0m, [1;32m 88 %[0m | [0;36m[1;33m 3417[0m / [0;33m 8119[0m MB | [1;30mfelix[0m([0;33m563M[0m) [1;30mdenys[0m([0;33m2281M[0m) [1;30mfelix[0m([0;33m561M[0m)


In [5]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
print("Using device:", device)

# manualy set cuda device
torch.cuda.set_device(1)

CUDA Available:  True
Using device: cuda


In [6]:
# # random seeds
# torch.manual_seed(999)
# np.random.seed(999)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [7]:
# load MNIST data
batch_size = 32
(mnist_train_loader, mnist_val_loader, mnist_test_loader) = fgnh.mnist_dataloaders(batch_size)

In [9]:
### PART 1: Train classic feddforward model

In [10]:
# loss functions for the classic net
lmbda_l2 = (4.0*0.1/len(mnist_train_loader.dataset))
print(lmbda_l2)
      
classic_loss_func = fgnh.def_classical_cross_ent_loss(lmbda_l2)

8e-06


In [11]:
# Initialize the classic network
hidden_l_nums = [32,32]
drop_p= 0.2

In [12]:
classic_model = fgnl.Feedforward_Classic_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

TypeError: __init__() takes at least 4 arguments (4 given)

In [13]:
classic_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, classic_model.parameters()),momentum=0.5)

In [14]:
# train the model
epochs = 5

In [15]:
classic_train_res = fgnh.train(classic_model, mnist_train_loader, 
                             classic_loss_func, classic_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=mnist_test_loader)

Epoch 0 Train set - Average loss: 1.0559, Accuracy: 38805/60000 (65%)
Test set - Average loss: 0.6186, Accuracy: 8141/10000 (81%)
Epoch 1 Train set - Average loss: 0.4649, Accuracy: 52087/60000 (87%)
Test set - Average loss: 0.4582, Accuracy: 8633/10000 (86%)
Epoch 2 Train set - Average loss: 0.3788, Accuracy: 53593/60000 (89%)
Test set - Average loss: 0.3759, Accuracy: 8943/10000 (89%)
Epoch 3 Train set - Average loss: 0.3356, Accuracy: 54348/60000 (91%)
Test set - Average loss: 0.3286, Accuracy: 9088/10000 (91%)
Epoch 4 Train set - Average loss: 0.3136, Accuracy: 54644/60000 (91%)
Test set - Average loss: 0.3601, Accuracy: 8993/10000 (90%)


In [16]:
### PART 2: Convert model to FGN

In [17]:
# step 1: Initialize the fgn network
print("hidden layers",hidden_l_nums)
print("drop_p=", drop_p)
fgn_model = fgnl.Feedforward_FGN_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

hidden layers [16, 16, 16]
drop_p= 0.0


In [18]:
# step 2: convert
fgnl.convert_classic_to_fgn(classic_model, fgn_model)

In [19]:
# test models
classic_test_res = fgnh.test(classic_model, mnist_train_loader, 
                             classic_loss_func, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy)

Test set - Average loss: 0.3590, Accuracy: 53800/60000 (90%)


In [20]:
fgn_test_res = fgnh.test(fgn_model, mnist_train_loader, 
                             classic_loss_func, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy)

Test set - Average loss: 89.2898, Accuracy: 5315/60000 (9%)


In [21]:
# unless tanh is added to the FGN model, this will not reproduce the results perfectly

In [22]:
# next idea: use grad descent to adjust sigmas and centers 

In [23]:
for p in fgn_model.named_parameters():
    print(p)

('hidden_layers.0.weights', Parameter containing:
tensor([[ 0.0773,  0.0626,  0.0701,  ...,  0.0633,  0.0840,  0.1018],
        [-0.2348, -0.2817, -0.2912,  ..., -0.2770, -0.2528, -0.2230],
        [-0.0738, -0.0583, -0.0571,  ..., -0.0774, -0.0383, -0.0819],
        ...,
        [-0.0728, -0.0492, -0.1077,  ..., -0.0598, -0.0412, -0.0971],
        [-0.0670, -0.0777, -0.0697,  ..., -0.0434, -0.0541, -0.0796],
        [ 0.2034,  0.1624,  0.1747,  ...,  0.1764,  0.2032,  0.1492]],
       device='cuda:1', requires_grad=True))
('hidden_layers.0.centers', Parameter containing:
tensor([[6.4480e-05, 5.2232e-05, 5.8427e-05,  ..., 5.2752e-05, 7.0050e-05,
         8.4870e-05],
        [5.5173e-04, 6.6196e-04, 6.8414e-04,  ..., 6.5074e-04, 5.9408e-04,
         5.2399e-04],
        [6.1141e-05, 4.8304e-05, 4.7279e-05,  ..., 6.4111e-05, 3.1722e-05,
         6.7803e-05],
        ...,
        [1.0075e-04, 6.8096e-05, 1.4914e-04,  ..., 8.2749e-05, 5.7008e-05,
         1.3440e-04],
        [7.7081e-05,

In [25]:
fgn_sigmas_optimizer

RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    eps: 1e-08
    lr: 0.01
    momentum: 0.5
    weight_decay: 0
)

In [26]:
# loss functions for the fgn
lmbda_sigmas = 10.0*lmbda_l2
print(lmbda_sigmas)
      
fgn_loss_func = fgnl.def_fgn_cross_ent_loss(lmbda_l2, lmbda_sigmas)

6.66666666667e-05


In [27]:
# train the converted model, but not the weights
epochs = 10
fgn_train_res = fgnh.train(fgn_model, mnist_train_loader, 
                             fgn_loss_func, fgn_sigmas_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy, test_loader=mnist_test_loader)

Epoch 0 Train set - Average loss: 67.1128, Accuracy: 49121/60000 (82%)
Test set - Average loss: 92.5522, Accuracy: 1009/10000 (10%)
Epoch 1 Train set - Average loss: 66.9296, Accuracy: 49352/60000 (82%)
Test set - Average loss: 84.8864, Accuracy: 1012/10000 (10%)
Epoch 2 Train set - Average loss: 66.8038, Accuracy: 49463/60000 (82%)
Test set - Average loss: 80.4849, Accuracy: 1045/10000 (10%)
Epoch 3 Train set - Average loss: 66.6935, Accuracy: 49697/60000 (83%)
Test set - Average loss: 77.8852, Accuracy: 1158/10000 (12%)
Epoch 4 Train set - Average loss: 66.5930, Accuracy: 49799/60000 (83%)
Test set - Average loss: 77.2787, Accuracy: 1113/10000 (11%)
Epoch 5 Train set - Average loss: 66.4955, Accuracy: 49975/60000 (83%)
Test set - Average loss: 75.8134, Accuracy: 1302/10000 (13%)
Epoch 6 Train set - Average loss: 66.4025, Accuracy: 50185/60000 (84%)
Test set - Average loss: 74.8636, Accuracy: 1353/10000 (14%)
Epoch 7 Train set - Average loss: 66.3118, Accuracy: 50351/60000 (84%)
Test 

In [28]:
fgn_test_res = fgnh.test(fgn_model, mnist_train_loader, 
                             classic_loss_func, verbose=True, 
                             pred_func=fgnh.cross_ent_pred_accuracy)

Test set - Average loss: 7.5570, Accuracy: 9759/60000 (16%)
