In [1]:
# tests for convert_Classic2FGN.py

In [2]:
from convert_Classic2FGN import convert_Classic2FGN

In [3]:
from __future__ import print_function

import numpy as np
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms

from Classic_MNIST_Net import Classic_MNIST_Net
from Feedforward_FGN_net import Feedforward_FGN_net
from test import test

In [4]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

CUDA Available:  True


In [5]:
# load some data
# MNIST dataset and dataloader declaration
# transforms does both the conversion from 0-255 to 0-1
# and normalizes by the precomputed mean and std

batch_size = 32

mnist_train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../../MNIST-dataset', train=True, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=True)

mnist_test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../../MNIST-dataset', train=False, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=False)

In [6]:
# create models to be converted
classic_model = Classic_MNIST_Net(hidden_l_nums=[3])
classic_model.to(device)
fgn_model = Feedforward_FGN_net(28*28,10,[3])
fgn_model.to(device)

Feedforward_FGN_net(
  (id): Dropout(p=0.0)
  (hidden_layers): ModuleList(
    (0): FGN_layer()
    (1): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fl): FGN_layer()
)

In [7]:
# nll loss function
def classic_nll_loss_func(model, output, target):
    return F.nll_loss(output, target)

In [8]:
# nll loss function
def fgn_nll_loss_func(model, output, target):
#     # split output into pred and likelihoods
#     output, likelihood = output
    return F.nll_loss(output, target)    

In [9]:
# number of correct pred function for classic net
def classic_pred_func(output, target):
    output = output
    pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
    correct = pred.eq(target.long().view_as(pred)).sum().item()
    return correct

In [10]:
# number of correct pred function for fgnet
def fgn_pred_func(output, target):
#     # split output into pred and likelihoods
#     output,_ = output
    pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
    correct = pred.eq(target.long().view_as(pred)).sum().item()
    return correct

In [11]:
# before conversion
classic_test_res = test(classic_model, device, mnist_test_loader, loss_func=classic_nll_loss_func, verbose=True, pred_func=classic_pred_func)
fgn_test_res = test(fgn_model, device, mnist_test_loader, loss_func=fgn_nll_loss_func, verbose=True, pred_func=fgn_pred_func)

Test set - Average loss: 2.3767, Accuracy: 1178/10000 (12%)
Test set - Average loss: 0.1097, Accuracy: 982/10000 (10%)


In [12]:
# some fgnet dict values
print(fgn_model.state_dict())

OrderedDict([('hidden_layers.0.weights', tensor([[ 22.5116, -12.7173,   3.1298,  ...,   9.2547,  17.2639,   8.1681],
        [-13.0032,   9.9939,  -5.6512,  ...,  24.4244, -27.2463,   3.4933],
        [-13.1889,  13.6434, -21.8499,  ...,  -1.0773,   1.7977, -18.1188]],
       device='cuda:0')), ('hidden_layers.0.centers', tensor([[-0.5512,  0.1094, -0.5906,  ..., -0.2467, -1.6058,  1.1626],
        [-2.5823, -0.7790, -0.6558,  ...,  0.6129,  0.6411, -0.8280],
        [-0.4470,  0.4859, -0.5332,  ...,  0.9407, -3.4504,  0.9148]],
       device='cuda:0')), ('hidden_layers.0.sigs', tensor([9.6147, 9.6147, 9.6147], device='cuda:0')), ('hidden_layers.1.weight', tensor([0.5414, 0.1278, 0.4758], device='cuda:0')), ('hidden_layers.1.bias', tensor([0., 0., 0.], device='cuda:0')), ('hidden_layers.1.running_mean', tensor([0., 0., 0.], device='cuda:0')), ('hidden_layers.1.running_var', tensor([1., 1., 1.], device='cuda:0')), ('hidden_layers.1.num_batches_tracked', tensor(0, device='cuda:0')), ('fl

In [13]:
# CONVERT CALL
convert_Classic2FGN(classic_model=classic_model, fgn_model=fgn_model)

RuntimeError: Error(s) in loading state_dict for Feedforward_FGN_net:
	Missing key(s) in state_dict: "hidden_layers.1.running_var", "hidden_layers.1.bias", "hidden_layers.1.weight", "hidden_layers.1.running_mean". 

In [None]:
# after conversion
classic_test_res = test(classic_model, device, mnist_test_loader, loss_func=classic_nll_loss_func, verbose=True, pred_func=classic_pred_func)
fgn_test_res = test(fgn_model, device, mnist_test_loader, loss_func=fgn_nll_loss_func, verbose=True, pred_func=fgn_pred_func)

In [None]:
# some fgnet dict values (should have changed)
print(fgn_model.state_dict())

In [None]:
# Expected: results to be close to identical
# and without needing to reload the fgn_net