GaborNet with FC gabor parameters optimization

In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
from numpy.random import rand, randn
import torchvision.datasets as dset
import torchvision.transforms as transforms

In [68]:
def getRandomGaborParams(size):
    theta = np.pi*(2*rand() - 1)
    psi = np.pi*rand()
    sigma = 3*np.abs(randn()) + 10e-3
    lambd = 5*np.abs(randn()) + 10e-3
    gamma = np.pi*rand()
    return {'ksize':size, 'sigma':sigma, 'theta': theta, 'lambd':lambd, 'gamma':gamma, 'psi': psi}

def calcGaborKernel(params):
    return cv2.getGaborKernel(**params)

def getParams(params):
    return np.array([[params['sigma']], [params['theta']], [params['lambd']], [params['gamma']], [params['psi']]])

In [20]:
Win = 36
K = 3
S = 1
P = 0

Kp = 4
Sp = 1
Pp = 0
D = 1

for i in range(1):
    Win = (Win - K + 2*P)/S + 1
    print(Win)
    Win = (Win + 2*Pp - D*(Kp-1) - 1)/Sp + 1
    print('pooling', Win)

34.0
pooling 31.0


In [5]:
Hin = [94]
K = 4
S = 1
P = 0
D = 1
for i in Hin:
    print((i + 2*P - D*(K-1) - 1)/S + 1)

91.0


In [53]:
class ChildNet(nn.Module):
    def __init__(self):
        
        super(ChildNet, self).__init__()
        self.params = nn.Linear(1,15)
        self.fc1 = nn.Linear(15, 50)
        self.fc2 = nn.Linear(50, 100)
        self.fc3 = nn.Linear(100, 200)
        self.fc4 = nn.Linear(200, 400)
        self.fc5 = nn.Linear(400, 500)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

In [128]:
class ParentNet(nn.Module):
    def __init__(self, out_filters = 1):
        super(ParentNet, self).__init__()
        
        self.g1 = nn.Conv2d(1,1, kernel_size=15, padding=7, bias = False)
        self.g2 = nn.Conv2d(1,1, kernel_size=15, padding=7, bias = False)
        self.g3 = nn.Conv2d(1,1, kernel_size=15, padding=7, bias = False)
        self.fc4 = nn.Linear(28*28*1, 500)
        self.fc5 = nn.Linear(500, 10)

    def forward(self, x):
        with torch.no_grad():
            x = F.relu(self.g1(x))
            x = F.relu(self.g2(x))
            x = F.relu(self.g3(x))
        x = x.view(-1, 28*28)
        x = F.relu(self.fc4(x))
        x = F.softmax(self.fc5(x))
        return x
    
    def update_gabor(self, params):
        p = 0
        for layer in [self.g1, self.g2, self.g3]:
            for i in range(layer.in_channels):
                for j in range(layer.out_channels):
                    if p < len(params):
                        layer.weight[0][0] = torch.tensor(calcGaborKernel({'ksize':layer.weight[0][0].data.numpy().shape, 'sigma':params[0], 'theta': params[1], 'lambd':params[2], 'gamma':params[3], 'psi': params[4]}))
                        p += 5

In [129]:
transform = transforms.Compose([transforms.ToTensor()])

train_set = dset.MNIST(root='', train=True, download=True, transform=transform)
test_set = dset.MNIST(root='', train=False, download=True, transform=transform)

batch_size = 128

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

In [130]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2a589a8d1d0>

In [131]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

In [132]:
print('==>>> total trainning batch number: {}'.format(len(train_loader)))
print('==>>> total testing batch number: {}'.format(len(test_loader)))

==>>> total trainning batch number: 469
==>>> total testing batch number: 79


In [135]:
child = ChildNet()
parent = ParentNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(parent.parameters(), lr=0.001, momentum=0.9)
child_optimizer = optim.SGD(child.parameters(), lr=0.01, momentum=0.9)

params = getRandomGaborParams(size = (15,15))
parent.g1.weight[0][0] = torch.tensor(calcGaborKernel(params))
child.params.weight[0:5] = torch.tensor(getParams(params))

params = getRandomGaborParams(size = (15,15))
parent.g2.weight[0][0] = torch.tensor(calcGaborKernel(params))
child.params.weight[5:10] = torch.tensor(getParams(params))

params = getRandomGaborParams(size = (15,15))
parent.g3.weight[0][0] = torch.tensor(calcGaborKernel(params))
child.params.weight[10:15] = torch.tensor(getParams(params))

for epoch in range(50):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = parent(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        child_optimizer.step()
        
        
        parent.update_gabor(child.params.weight.view(15))

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 199:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            print(child.params.weight)
            running_loss = 0.0

print('Finished Training')



[1,   200] loss: 0.216
Parameter containing:
tensor([[ 4.6909],
        [-0.1307],
        [ 2.8460],
        [ 2.5066],
        [ 0.6619],
        [ 2.7616],
        [ 0.5156],
        [ 2.0610],
        [ 2.0864],
        [ 3.0508],
        [ 1.0943],
        [ 2.0181],
        [ 4.7917],
        [ 0.5694],
        [ 2.5380]], grad_fn=<CopySlices>)
[2,   200] loss: 0.191
Parameter containing:
tensor([[ 4.6909],
        [-0.1307],
        [ 2.8460],
        [ 2.5066],
        [ 0.6619],
        [ 2.7616],
        [ 0.5156],
        [ 2.0610],
        [ 2.0864],
        [ 3.0508],
        [ 1.0943],
        [ 2.0181],
        [ 4.7917],
        [ 0.5694],
        [ 2.5380]], grad_fn=<CopySlices>)


KeyboardInterrupt: 

In [152]:
parent.zero_grad()
child = ChildNet()
parent = ParentNet()
parent.zero_grad()

In [153]:
print(parent.fc4.weight.grad)

None


In [70]:
getParams(getRandomGaborParams(size = (15,15))).shape

(5, 1)

In [31]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        self.fc3.weight.requires_grad = False 
        self.fc3.bias.requires_grad = False 

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        #with torch.no_grad():
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
tensor([[ 0.0616, -0.0975,  0.0262,  0.0136,  0.0476, -0.0530, -0.0519, -0.0139,
          0.0166,  0.0120]], grad_fn=<ThAddmmBackward>)


In [32]:
net.zero_grad()

In [410]:
input = torch.randn(1, 1, 32, 32)
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

loss = criterion(output, target)
print('loss', loss)

net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.fc2.bias.grad)
print(net.fc2.bias[5])
old = net.fc2.bias

output.backward(torch.randn(1, 10))

print('conv1.bias.grad after backward')
print(net.fc2.bias.grad)
print(net.fc2.bias[5])

optimizer.step()

loss tensor(0.9099, grad_fn=<MseLossBackward>)
conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor(-0.0783, grad_fn=<SelectBackward>)
conv1.bias.grad after backward
tensor([ 0.1894,  0.0000,  0.1095,  0.0000,  0.0000,  0.0000, -0.2229,  0.1932,
         0.3759,  0.0000,  0.1393,  0.2879, -0.0572,  0.0110,  0.0274, -0.2433,
         0.2750,  0.0000, -0.3357, -0.1028,  0.0000, -0.0323,  0.0664, -0.0533,
         0.2978,  0.0000, -0.1701, -0.0313,  0.0775,  0.0000, -0.2076, -0.2414,
         0.0000, -0.1707,  0.0000,  0.0000, -0.2074,  0.0514,  0.0544,  0.2150,
         0.0000,  0.0000,  0.2230, -0.0681, -0.0122,  0.0000,  0.0000, 

In [417]:
print(torch.__version__)

0.4.1
