In [1]:
# dev
# 1 train an classical model
# 2 convert to fgn
# 3 retrain

In [2]:
from __future__ import print_function

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
# import sys
# sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research')
import torch_helper_lib as th

In [4]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
print("Using device:", device)

# manualy set cuda device

torch.cuda.set_device(0)

CUDA Available:  True
Using device: cuda


In [5]:
# # random seeds
# torch.manual_seed(999)
# np.random.seed(999)

# torch.backends.cudnn.deterministic = True
# torch.cuda.manual_seed_all(999)

In [6]:
# MNIST dataset and dataloader declaration
# transforms does both the conversion from 0-255 to 0-1
# and normalizes by the precomputed mean and std

batch_size = 1024

mnist_train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../MNIST-dataset', train=True, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=True)

mnist_test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../MNIST-dataset', train=False, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=batch_size, shuffle=True)

In [7]:
# loss functions for the classic net
lmbda_l2 = (4.0*0.1/len(mnist_train_loader.dataset))
print(lmbda_l2)
      
def l2_loss(model):
    # sum of w^2 for regularizer 
    for p in model.named_parameters():
        if ('weight' in p[0]) or ('bias' in p[0]):
            try:
                l2 += torch.sum(p[1]**2)
            except:
                l2 = torch.sum(p[1]**2)          
    return l2


def classical_cross_ent_loss(model, output, target):
    cent_loss = F.cross_entropy(output, target.long())
    l2 = l2_loss(model)
    return cent_loss + lmbda_l2*l2

6.66666666667e-06


In [8]:
# Network params
hidden_l_nums = [32,32]
drop_p= 0.0

In [9]:
# create the classical network
classic_model = th.Feedforward_Classic_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

In [10]:
# network optimizer
classic_optimizer = optim.RMSprop(filter(lambda p: p.requires_grad, classic_model.parameters()),momentum=0.5)

In [14]:
# training params
epochs = 5

In [15]:
# train the model
classic_train_res = th.train(classic_model, device, mnist_train_loader, 
                             classical_cross_ent_loss, classic_optimizer, epochs, save_hist=2, verbose=True, 
                             pred_func=th.cross_ent_pred_accuracy, test_loader=mnist_test_loader)

Epoch 0 Train set - Average loss: 0.6503, Accuracy: 47631/60000 (79%)
Test set - Average loss: 0.4206, Accuracy: 8805/10000 (88%)
Epoch 1 Train set - Average loss: 0.3520, Accuracy: 53915/60000 (90%)
Test set - Average loss: 0.3659, Accuracy: 8950/10000 (90%)
Epoch 2 Train set - Average loss: 0.2987, Accuracy: 54841/60000 (91%)
Test set - Average loss: 0.3191, Accuracy: 9108/10000 (91%)
Epoch 3 Train set - Average loss: 0.2736, Accuracy: 55378/60000 (92%)
Test set - Average loss: 0.3387, Accuracy: 9059/10000 (91%)
Epoch 4 Train set - Average loss: 0.2576, Accuracy: 55616/60000 (93%)
Test set - Average loss: 0.2795, Accuracy: 9243/10000 (92%)


In [16]:
### Convert to FGN

In [17]:
# create model to be converted
fgn_model = th.Feedforward_FGN_net(in_feats=28*28, out_feats=10, hidden_l_nums=hidden_l_nums, drop_p=drop_p).to(device)

In [31]:
for name, module in reversed(classic_model._modules.items()):
    print(name, module)
    if len(list(module.children())) > 0:
        # recurse
        classic_model._modules[name = convert_layers(model=module, num_to_convert=num_to_convert-conversion_count, layer_type_old, layer_type_new, convert_weights)
        conversion_count += num_converted

fl Linear(in_features=32, out_features=10, bias=True)
hidden_layers ModuleList(
  (0): Linear(in_features=784, out_features=32, bias=True)
  (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Linear(in_features=32, out_features=32, bias=True)
  (3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
id Dropout(p=0.0)


In [11]:
def convert_layers(model, layer_type_old, layer_type_new, convert_weights=False):
    
    for name, module in reversed(model._modules.items()):
        print(name, type(module))
        if len(list(module.children())) > 0:
            # recurse
            print("recursing")
            model._modules[name] = convert_layers(module, layer_type_old, layer_type_new, convert_weights)

        if type(module) == layer_type_old :
            print("changing type")
            layer_old = module
            layer_new = layer_type_new(28*28,10) 

            if convert_weights == True:
                layer_new.weight = layer_old.weight
                layer_new.bias = layer_old.bias

            model._modules[name] = layer_new

    return model

In [12]:
converted_model = convert_layers(classic_model, nn.Linear, th.FGN_layer)

fl <class 'torch.nn.modules.linear.Linear'>
changing type
hidden_layers <class 'torch.nn.modules.container.ModuleList'>
recursing
3 <class 'torch.nn.modules.batchnorm.BatchNorm1d'>
2 <class 'torch.nn.modules.linear.Linear'>
changing type
1 <class 'torch.nn.modules.batchnorm.BatchNorm1d'>
0 <class 'torch.nn.modules.linear.Linear'>
changing type
id <class 'torch.nn.modules.dropout.Dropout'>


In [52]:
print(converted_model)

Feedforward_Classic_net(
  (id): Dropout(p=0.0)
  (hidden_layers): ModuleList(
    (0): FGN_layer()
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): FGN_layer()
    (3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fl): FGN_layer()
)


In [18]:
# convert
th.convert_Classic2FGN(classic_model=classic_model, fgn_model=fgn_model)
print("Model converted")

RuntimeError: Error(s) in loading state_dict for Feedforward_FGN_net:
	Missing key(s) in state_dict: "hidden_layers.1.running_var", "hidden_layers.1.bias", "hidden_layers.1.weight", "hidden_layers.1.running_mean", "hidden_layers.3.running_var", "hidden_layers.3.bias", "hidden_layers.3.weight", "hidden_layers.3.running_mean". 
	size mismatch for hidden_layers.2.weights: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([32, 32]).
	size mismatch for hidden_layers.2.centers: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([32, 32]).
	size mismatch for fl.weights: copying a param with shape torch.Size([32, 32]) from checkpoint, the shape in current model is torch.Size([10, 32]).
	size mismatch for fl.centers: copying a param with shape torch.Size([32, 32]) from checkpoint, the shape in current model is torch.Size([10, 32]).