In [1]:
# dev code for making FGNets works on batches of data instead of a single element

In [2]:
from __future__ import print_function

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import numpy as np
from scipy import stats

# graph viz tool
# import sys
# sys.path.append('/home/felix/Research/Adversarial Research/FGN---Research')
import torch_helper_lib as th



In [4]:
# define the FGN layer class
import math 

class FGN_layer(nn.Module):
    r""" Applies a Finite Gaussian Neuron layer to the incoming data
    
    Args:
    
    Shape:
    
    Attributes:
    
    Examples:
        
        >>> l=FGN_layer(20,30)
    
    """
    def __init__(self, in_features, out_features):
        super(FGN_layer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        # learnable parameters
        # regular NN weights (transposed at the start, see order of Tensor(dims))
        self.weights = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad= True)
        # centers of FGNs
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features), requires_grad = True)
        # size of FGNs
        self.sigs = nn.Parameter(torch.Tensor(out_features,), requires_grad = True)
        
        # parameter init call
        self.reset_parameters()
    
    # parameter init definition
    def reset_parameters(self):
        s = np.sqrt(self.in_features)
        # regular NN init
        self.weights.data.uniform_(-s, s)
        # centers init
        self.centers.data.uniform_(-s, s)
        # size init 
        self.sigs.data.uniform_(100*s, 100*s)
        
    def forward(self, input):
        
        # linear part is the same as normal NNs
        l = nn.functional.linear(input, self.weights, bias=None)
        # optional, apply tanh here
        # l = torch.tanh(l)
#         print("size of L", l.size())
#         print(l)


        # gaussian component
        # unsqueeze the inputs to allow broadcasting
        # compute distance to centers
        g = (input.unsqueeze(1)-self.centers)**2
#         print("size of G", g.size())
        # print(g)
        g = g.sum(dim=2)
#         print("size of G", g.size())

        # for future, use any norm?
        # g2 = torch.norm(self.input.unsqueeze(1)-centers), p=2, dim=2)

        # apply sigma
        g = -g/(self.sigs**2)
#         print("size of G", g.size())
        # apply exponential
        g = torch.exp(g)
#         print("size of G", g.size())

        # combine gaussian with linear
        res = l*g
        # optional, flatten res
        # res = F.tanh(res)
#         print("size of L*G", res.size())

        # clip res to +1
        res  = torch.clamp(res, min=-1.0, max=1.0)
        
        return res
    

In [5]:
# # dev - is the computation correct?
# inputs = torch.randint(low=0, high=10, size=(2,3))
# print("inputs:\n", inputs)
# weights = torch.randint(low=-1, high=1, size=(4,3))
# print("weights:\n", weights)
# centers = torch.randint(low=-1, high=1, size=(4,3))
# print("centers:\n", centers)
# sigs = torch.randint(low=-1, high=1, size=(4,))
# print("sigs:\n", sigs)

# # linear part is the same as normal NNs
# l = nn.functional.linear(inputs, weights, bias=None)
# # optional, apply tanh here
# # l = torch.tanh(l)
# print("size of L", l.size())
# print(l)


# # gaussian component
# # unsqueeze the inputs to allow broadcasting
# # compute distance to centers
# g = (inputs.unsqueeze(1)-centers)**2
# print("size of G", g.size())
# # print(g)
# g = g.sum(dim=2)

# # for future, use any norm?
# # g2 = torch.norm(inputs.unsqueeze(1)-centers), p=2, dim=2)


# print("size of G", g.size())
# # apply sigma
# g = -g/(sigs**2)
# print("size of G", g.size())
# # apply exponential
# g = torch.exp(g)
# print("size of G", g.size())
# # # g = g.view(l.size())
# # print("size of G", g.size())
# # print(g)

# # g = torch.exp(-((input-self.centers)**2).sum(dim=1)/(self.sigs**2))

# # combine
# res = l*g
# # optional, flatten res
# # res = F.tanh(res)
# #         print("size of L*G", res.size())

# # clip res to +1
# res  = torch.clamp(res, min=-1.0, max=1.0)
# print(res)

In [6]:
# MNIST dataset and dataloader declaration
# transforms does both the conversion from 0-255 to 0-1
# and normalizes by the precomputed mean and std

batchsize = 30000

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../MNIST-dataset', train=True, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=256, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../MNIST-dataset', train=False, download=False, 
                   transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
            ])), 
        batch_size=256, shuffle=False)

In [7]:
# Define a Finite Gaussian Neural Network

class FGNet(nn.Module):
    def __init__(self):
        super(FGNet, self).__init__()
        self.l1 = FGN_layer(28*28,100)
        self.l2 = FGN_layer(100,10)

    def forward(self, x):
        # flatten the image first
        x = x.view(-1, 28*28)
#         print(x)
        x = self.l1(x)
        x = self.l2(x)
#         print(x)
        x = F.log_softmax(x, dim=-1)
        
#         final_r = []

#         for sample in x:
#             r = sample.view(-1,28*28)
#             r = self.l1(r)
#             r = self.l2(r)

#             final_r.append(r)

        return x

In [8]:
def train(model, device, train_loader, optimizer, epoch):
    
    losses = th.AverageMeter()
    
    # set model to trainable mode
    model.train()
    # load a batch
    for batch_idx, (data, target) in enumerate(train_loader):
        # load batch data, targets to device
        data, target = data.to(device), target.to(device)
        # reset optimizer gradients
        optimizer.zero_grad()
        # compute predictions
        output = model(data)
        # compute loss
        loss = F.nll_loss(output, target)
        # print(loss)
        # update rolling average
        losses.update(loss.item(), data.size(0) )
        # propagate gradients
        loss.backward()
#         print("max grad centers:", torch.max(model.l1.centers.grad) )
#         print("max grad sigs:", torch.max(model.l1.sigs.grad) )

        # apply stored gradients to parameters
        optimizer.step()
        if batch_idx % 10000 == 0:
#             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 epoch, batch_idx * len(data), len(train_loader.dataset),
#                 100. * batch_idx / len(train_loader), loss.item()))
            print("Loss", losses.avg)

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False ).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    


In [9]:
# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
torch.manual_seed(666)
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# Initialize the network
model = FGNet().to(device)

optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.5)

# train the model 
for epoch in range(1, 50):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

CUDA Available:  True
Loss 2.58909392357

Test set: Average loss: 2.6373, Accuracy: 1110/10000 (11%)

Loss 2.69168257713

Test set: Average loss: 2.6358, Accuracy: 1109/10000 (11%)

Loss 2.6107776165

Test set: Average loss: 2.6345, Accuracy: 1109/10000 (11%)

Loss 2.51916527748

Test set: Average loss: 2.6333, Accuracy: 1112/10000 (11%)

Loss 2.65301418304

Test set: Average loss: 2.6321, Accuracy: 1113/10000 (11%)

Loss 2.56910300255

Test set: Average loss: 2.6309, Accuracy: 1115/10000 (11%)

Loss 2.68103909492

Test set: Average loss: 2.6295, Accuracy: 1117/10000 (11%)

Loss 2.63765311241

Test set: Average loss: 2.6283, Accuracy: 1118/10000 (11%)

Loss 2.66585612297

Test set: Average loss: 2.6272, Accuracy: 1119/10000 (11%)

Loss 2.66963744164

Test set: Average loss: 2.6261, Accuracy: 1119/10000 (11%)

Loss 2.58609890938

Test set: Average loss: 2.6250, Accuracy: 1119/10000 (11%)

Loss 2.55832338333

Test set: Average loss: 2.6238, Accuracy: 1120/10000 (11%)

Loss 2.62238049507
