<a href="https://colab.research.google.com/github/jeffreyboschman/FiveMinuteMachineLearning/blob/main/deep_learning_basics/convolutions_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Code

In [4]:
import numpy as np
import torch

In [148]:
class convolution_2d_basic():
    def __init__(self, out_channels, in_channels, kernel_size, use_bias = True):
        self.kernel_size = int(kernel_size) #e.g., 3
        self.in_channels = int(in_channels) #e.g., 10
        self.out_channels = int(out_channels) #e.g., 20

        self.filters = np.random.rand(out_channels, in_channels, kernel_size, kernel_size) #shape (C_out, C_in, k, k)
        
        self.use_bias = use_bias
        if self.use_bias:
            self.bias =  np.random.rand(out_channels)

        #for this basic convolution, we are setting no padding and stride 1
        self.padding = 0
        self.stride = 1

    def forward(self, input):
        '''
        Applies a 2d convolution on a set of input activations (e.g., an image) 
            using the current self.filters and self.bias 
            to get a set of output activations 

        input:  activations with shape (N, C_in, H_in, W_in)
        output: activations with shape (N, C_out, H_out, W_out)

        where N is batch size, C_in is the number of channels in the input, H_in is height of input (in pixels), W_in is width of input (in pixels),
                            C_out is the number of channels in the output, H_out is height of output (in pixels), W_out is width of output (in pixels)
        '''
        if len(input.shape) == 3:
            input = np.expand_dims(input, axis = 0)
        batch_size = input.shape[0]
        in_height, in_width = input.shape[-2], input.shape[-1]
        out_height = int(((in_height - self.kernel_size + 2*self.padding) / self.stride) + 1)
        out_width = int(((in_width - self.kernel_size + 2*self.padding) / self.stride) + 1)
        output = np.zeros((batch_size, self.out_channels, out_height, out_width))

        for channel in range(self.out_channels):
            for row in range(out_width):
                for col in range(out_height):
                
                    current_inputs = input[:, :, row:row+self.kernel_size, col:col+self.kernel_size] #shape (N, C_in, k, k)

                    output[:, channel, row, col] = np.sum(np.multiply(current_inputs, self.filters[channel, :, :, :]), axis = (1,2,3)) #filters shape (C_out, C_in, k, k)
                    print(output) #shape (N, C_out, H_out, W_out)

            if self.use_bias:
                output[:, channel, :, :] = np.add(output[:, channel, :, :], self.bias[channel])
                    
            print(output) #shape (N, C_out, H_out, W_out)



# Demonstration

In [149]:
conv_test = convolution_2d_basic(2, 1, 3) #filter shape (C_out, C_in, k)
print(conv_test.filters)

[[[[0.51373927 0.17902503 0.4902696 ]
   [0.77948908 0.392648   0.58496166]
   [0.37538249 0.46349659 0.06209062]]]


 [[[0.90971835 0.3670629  0.13318029]
   [0.05503488 0.46727954 0.27184208]
   [0.50395048 0.42800169 0.89241728]]]]


In [150]:
input = np.random.rand(2, 1, 5, 5) #input shape (N, C_in, H_in, W_in)
#input = torch.tensor(input)

In [151]:
conv_test.forward(input)

[[[[1.78000559 0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]


 [[[1.56706437 0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]]
[[[[1.78000559 2.29163576 0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]


 [[[1.56706437 1.90604981 0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]

  [[0.         0.         0.        ]
   [0.         0.         0.        ]
   [0.         0.         0.        ]]]]
[[[[1.78000559 2.29163576 1.72716317]
   [0.         0.         0.

# Other resources

- https://arxiv.org/pdf/1603.07285.pdf
- https://github.com/vdumoulin/conv_arithmetic
- https://www.quora.com/How-can-I-calculate-the-size-of-output-of-convolutional-layer