In [2]:
import numpy as np

b = 1
c = 4
w = 5
h = 5
o = 8
s = 3

img = np.random.rand(b, c, w, h)
filter = np.random.rand(o, c, s, s)
output = np.zeros((b, o, w, h))

def conv_img(img, filter):
    c, w, h = img.shape
    o, c, s, _ = filter.shape
    shift = s // 2
    output = np.zeros((o, w, h))
    padded_img = np.pad(img, [(0, 0), (shift, shift), (shift, shift)], 'constant')
    
    for i in range(shift, w + shift):
        for j in range(shift, h + shift):
            for k in range(c):
                for l in range(s):
                    for m in range(s):
                        for n in range(o):
                            output[n, i - shift, j - shift] += padded_img[k, i + l - shift, j + m - shift] * filter[n, k, l, m]
    return output

for i in range(b):
    output[i, :, :, :] = conv_img(img[i], filter)

b = 1
c = 4
w = 5
h = 5
o = 8
s = 3

img = np.random.rand(b, c, w, h)
filter = np.random.rand(o, c, s, s)
output = np.zeros((b, o, w, h))
print(img.shape)
print(filter.shape)

print(output.shape)

(1, 4, 5, 5)
(8, 4, 3, 3)
(1, 8, 5, 5)


In [3]:
def calc_size(input_tensor, output_c, kernel_size, stride, padding):
    b, c, w, h = input_tensor.shape
    new_w  = (w - kernel_size + 2 * padding) // stride + 1
    new_h  = (h - kernel_size + 2 * padding) // stride + 1
    
    return (b, output_c, new_w, new_h)

In [30]:
def unfold(img, kernel_size, stride, padding):
    img = np.pad(img, [(0, 0), (0, 0), (padding, 1), (padding, padding)], 'constant')
    b, c, h, w = img.shape
    output_w  = (w - kernel_size) // stride + 1
    output_h  = (h - kernel_size) // stride + 1
    shift = kernel_size // 2
    patches = output_w * output_h
    values_per_patch = kernel_size * kernel_size * c
    output = np.zeros((b, values_per_patch, patches))

    for batch in range(b):
        for i in range(shift, w - shift, stride):
            for j in range(shift, h - shift, stride):
                for k in range(kernel_size):
                    for l in range(kernel_size):
                        for m in range(c):
                            id0 = batch
                            id1 = kernel_size * kernel_size * m + l * kernel_size + k
                            id2 = (i - shift) * output_w + (j - shift)
                            output[id0, id1, id2] = img[batch, m, i + k - shift, j + l - shift]

    return output


In [2]:
import torch
import torch.nn.functional as F
from torch import nn


class Conv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0):
        super(Conv2D, self).__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.out_channels = out_channels
        w_init = torch.randn(in_channels * kernel_size * kernel_size, out_channels)
        # w_init = torch.randn(out_channels, in_channels * kernel_size * kernel_size)
        self.W = nn.Parameter(w_init)
        self.W_r = self.W.transpose(0, 1).reshape(out_channels, in_channels, kernel_size, kernel_size)

    def forward(self, input_batch):
        x = F.unfold(input_batch, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
        x = x.transpose(1, 2).reshape(-1, self.W.shape[0])
        x = torch.matmul(x, self.W)
        b, _, h, w = self.get_expected_shape(input_batch)
        x = x.reshape(b, h, w, self.out_channels).transpose(2, 3).transpose(1, 2)
        return x

    def get_expected_shape(self, input_tensor):
        b, _, w, h = input_tensor.shape
        new_h  = (h - self.kernel_size + 2 * self.padding) // self.stride + 1
        new_w  = (w - self.kernel_size + 2 * self.padding) // self.stride + 1
        
        return (b, self.out_channels, new_w, new_h)

in_channels = 3
out_channels = 5
kernel_size = 3
stride = 1
padding = kernel_size // 2

conv = Conv2D(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding)
input_batch = torch.randn(1, 3, 16, 16)
output_batch = conv(input_batch)
out_c = torch.nn.functional.conv2d(input_batch, conv.W_r, stride=stride, padding=padding)


torch.Size([1, 27, 36])


In [4]:
import torch
import pandas as pd
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4


LR = 0.0001
EPOCH = 10
MOMENTUM = 0.9
BATCH_SIZE = 4
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(3, 6, 5)
        self.conv2 = Conv2D(6, 16, 5)
        # self.conv1 = nn.Conv2d(3, 6, 5)
        # self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        self.pool_res = nn.MaxPool2d(5, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()


results = {
    'Epoch': [],
    'Loss': [],
    'Accuracy': [],
    'Method': [],
    'Run': []
}

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                        shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                        shuffle=False, num_workers=0)
criterion = nn.CrossEntropyLoss() 
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENTUM)

for i in range(10):
    for layer in net.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()
    for epoch in range(EPOCH):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] 
            inputs, labels = data # zero the parameter gradients
            optimizer.zero_grad() # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
        correct = 0.0
        total = 0.0
        # since we're not training, we don't need to calculate the gradients for our outputs
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                # calculate outputs by running images through the network
                outputs = net(images)
                # the class with the highest energy is what we choose as prediction
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'EPOCH {epoch + 1}')
        print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')
        print(f'Loss of the network on the 10000 test images: {running_loss:.3f}')

        # results['Loss'].append(running_loss)
        # results['Epoch'].append(epoch)
        # results['Accuracy'].append(correct / total)
        # results['Method'].append('Residual')
        # results['Run'].append(i)

# df= pd.DataFrame.from_dict(results)
# df.to_csv('data_12_residual.csv')

Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.382
[1,  4000] loss: 2.233
[1,  6000] loss: 2.198
[1,  8000] loss: 2.169
[1, 10000] loss: 2.118
[1, 12000] loss: 2.101
EPOCH 1
Accuracy of the network on the 10000 test images: 21.88%
Loss of the network on the 10000 test images: 1038.670
[2,  2000] loss: 2.074
[2,  4000] loss: 2.068
[2,  6000] loss: 2.045
[2,  8000] loss: 2.042
[2, 10000] loss: 2.028
[2, 12000] loss: 2.016
EPOCH 2
Accuracy of the network on the 10000 test images: 25.75%
Loss of the network on the 10000 test images: 990.091
[3,  2000] loss: 2.004
[3,  4000] loss: 1.995
[3,  6000] loss: 1.985
[3,  8000] loss: 1.971
[3, 10000] loss: 1.979
[3, 12000] loss: 1.973
EPOCH 3
Accuracy of the network on the 10000 test images: 28.03%
Loss of the network on the 10000 test images: 983.774


KeyboardInterrupt: 

In [84]:
import torch
import torch.nn.functional as F
from torch import nn


class Conv2DFunc(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input_batch, kernel, stride=1, padding=1):
        o, _, k, _ = kernel.shape
        b, _, new_h, new_w = Conv2DFunc.get_expected_shape(input_batch, k, padding, stride)
        w = kernel.reshape(o, -1).transpose(0, 1)
        ctx.input_shape = input_batch.shape
        ctx.kernel_shape = kernel.shape
        ctx.conv_params = (stride, padding)
        ctx.w = w

        x = F.unfold(input_batch, kernel_size=k, stride=stride, padding=padding)
        x = x.transpose(1, 2)
        ctx.folded_shape = x.shape
        x = x.reshape(-1, w.shape[0])
        ctx.u = x
        x = torch.matmul(x, w)
        x = x.reshape(b, new_h, new_w, o)
        x = x.transpose(2, 3).transpose(1, 2)
        return x

    @staticmethod
    def backward(ctx, grad_output):
        grad = grad_output.transpose(1, 2).transpose(2, 3)
        grad = grad.reshape(-1, ctx.kernel_shape[0])
        w_grad = torch.matmul(ctx.u.transpose(0, 1), grad)
        w_grad = w_grad.transpose(0, 1)
        w_grad = w_grad.reshape(ctx.kernel_shape)

        grad = torch.matmul(grad, ctx.w.transpose(0, 1)) # U
        grad = grad.reshape(ctx.folded_shape) # U'
        grad = grad.transpose(1, 2) # U''

        out_size = (ctx.input_shape[2], ctx.input_shape[3])
        kernel_size = ctx.kernel_shape[2]
        stride = ctx.conv_params[0]
        padding = ctx.conv_params[1]
        input_grad = F.fold(grad, output_size=out_size, kernel_size=kernel_size, stride=stride, padding=padding)

        return input_grad, w_grad, None, None

    @staticmethod
    def get_expected_shape(input_tensor, kernel_size, padding, stride):
        b, _, w, h = input_tensor.shape
        new_h  = (h - kernel_size + 2 * padding) // stride + 1
        new_w  = (w - kernel_size + 2 * padding) // stride + 1
        
        return (b, out_channels, new_w, new_h)

torch.manual_seed(2137)

in_channels = 3
out_channels = 5
kernel_size = 3
stride = 1
padding = kernel_size // 2

kernel = torch.rand(out_channels, in_channels, kernel_size, kernel_size, requires_grad=True)
conv = Conv2DFunc
input_batch = torch.randn(1, 3, 16, 16, requires_grad=True)

output_batch = torch.nn.functional.conv2d(input_batch, kernel, stride=stride, padding=padding)
loss = torch.sum(output_batch)
loss.backward()
grad1 = kernel.grad
# print(grad1.shape)
# print(kernel.grad)

output_batch = conv.apply(input_batch, kernel, stride, padding)
loss = torch.sum(output_batch)
loss.backward()
grad2 = kernel.grad
# print(grad2)
print(kernel.grad.shape)
print(torch.sum(grad1 - grad2))

# output_grad = torch.ones(output_batch.shape)
# conv.backward(output_grad)
# torch.autograd.gradcheck(torch.nn.functional.conv2d, {"input": input_batch, "weight": kernel, "stride": stride, "padding": padding})
# print(torch.max(torch.abs(out_c - output_batch)))

torch.Size([5, 3, 3, 3])
tensor(0.)
