<a href="https://colab.research.google.com/github/djdumpling/Machine-Learning-Prince-/blob/main/10_3_2D_Convolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Yippie! We're hand coding a 2d convolution
# I'm thinking that it's gonna be a lot of matrix multiplication or smt

In [None]:
import numpy as np
import torch

np.set_printoptions(precision=3,floatmode="fixed")
# learned that "" and '' are interchangable in Python
torch.set_printoptions(precision=3)

In [None]:
# let's do a convolution in PyTorch
# padding: since we lose information around the perimeter, we can add padding around
# stride: how far we shift the window
def conv_pytorch(image, conv_weights, stride = 1, pad = 1):
  image_tensor = torch.from_numpy(image) #creates tensor of the array
  conv_weights_tensor = torch.from_numpy(conv_weights) #creates tensor of the weights
  # let's do the convolution
  # input and weights must be tensors
  # other params include bias, dilation, and groups
  output_tensor = torch.nn.functional.conv2d(image_tensor, conv_weights_tensor, stride = stride, padding = pad)
  # convert tesnro back into NumPy array
  return(output_tensor.numpy())

# Conv 1 (no extra stride, channels, batch_size)

In [None]:
def conv_numpy_1(image, weights, pad = 1):
  # zero pad the image first (since we define a non-zero padding above)
  if pad != 0:
    # check padding definition later, basically padding with an array
    image = np.pad(image, ((0,0), (0,0), (pad, pad), (pad,pad)), 'constant')

  # get size of image and kernel
  # what does image.shape return? check if there are 4 returns
  #same with weights.shape
  # or if we're setting all 4 to be the same thing
  batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape
  channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape

  # output arrays
  # nvm, just basically counting how many kernelHeight's can fit in imageHeightIn
  # +1 is like counting how many numbers between 3 and 6 inclusive (6-3+1)=4
  imageHeightOut = np.floor(1+imageHeightIn-kernelHeight).astype(int)
  imageWidthOut = np.floor(1+imageWidthIn-kernelWidth).astype(int)

  # create output
  out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype = np.float32)

# outer two determine where the kernel is located
  for c_y in range(imageHeightOut):
    for c_x in range(imageWidthOut):
      # inner two determine the product within the kernel
      for c_kernel_y in range(kernelHeight):
        for c_kernel_x in range(kernelWidth):
          # first two for batchSize and channelsOut = 0 (we don't care now)
          this_pixel_value = image[0, 0, c_y + c_kernel_y, c_x + c_kernel_x]
          this_weight = weights[0, 0, c_kernel_y, c_kernel_x]

          # do a lil sum and put it at the output indice
          out[0,0,c_y, c_x] += this_pixel_value * this_weight

  return out

In [None]:
np.random.seed(1)
n_batch = 1
image_height = 4
image_width = 6
channels_in = 1
channels_out = 1
kernel_size = 3

# random input image
input_image = np.random.normal(size=(n_batch, channels_in, image_height, image_width))
# random conv kernel weights
conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))

#conv in Pytorch
conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride = 1, pad = 1)
print("PyTorch Results")
print(conv_results_pytorch)

#conv in numpy
print("Your results")
conv_results_numpy = conv_numpy_1(input_image, conv_weights)
print(conv_results_numpy)

PyTorch Results
[[[[-0.929 -2.760  0.716  0.114  0.560 -0.387]
   [-1.515  0.283  1.008  0.466 -1.094  2.004]
   [-1.634  3.555 -2.154 -0.892 -1.856  2.299]
   [ 0.565 -0.947 -0.629  2.996 -1.811 -0.533]]]]
Your results
[[[[-0.929 -2.760  0.716  0.114  0.560 -0.387]
   [-1.515  0.283  1.008  0.466 -1.094  2.004]
   [-1.634  3.555 -2.154 -0.892 -1.856  2.299]
   [ 0.565 -0.947 -0.629  2.996 -1.811 -0.533]]]]


# Conv 2 (Conv 1 + Stride)

In [None]:
def conv_numpy_2(image, weights, stride, pad = 1):
  if pad != 0:
    image = np.pad(image, ((0,0), (0,0), (pad, pad), (pad,pad)), 'constant')

  batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape
  channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape

  imageHeightOut = np.floor(1+ (imageHeightIn-kernelHeight)/stride).astype(int)
  imageWidthOut = np.floor(1+ (imageWidthIn-kernelWidth)/stride).astype(int)

  out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype = np.float32)

  for c_y in range(imageHeightOut):
    for c_x in range(imageWidthOut):
      for c_kernel_y in range(kernelHeight):
        for c_kernel_x in range(kernelWidth):
          # change c_y + c_kernel_y to stride*c_y+c_kernel_y
          # accounts for shifting of the pixel_value by the stride
          this_pixel_value = image[0, 0, stride * c_y + c_kernel_y, stride * c_x + c_kernel_x]
          this_weight = weights[0, 0, c_kernel_y, c_kernel_x]

          # do a lil sum and put it at the output indice
          out[0,0,c_y, c_x] += this_pixel_value * this_weight

  return out

In [None]:
# Set random seed so we always get same answer
np.random.seed(1)
n_batch = 1
image_height = 12
image_width = 10
channels_in = 1
kernel_size = 3
channels_out = 1
stride = 2

# Create random input image
input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))
# Create random convolution kernel weights
conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))

# Perform convolution using PyTorch
conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride, pad=1)
print("PyTorch Results")
print(conv_results_pytorch)

# Perform convolution in numpy
print("Your results")
conv_results_numpy = conv_numpy_2(input_image, conv_weights, stride, pad=1)
print(conv_results_numpy)

PyTorch Results
[[[[-0.809 -4.550 -5.486 -9.506 -4.512]
   [-0.055  1.145 -5.388 -3.910  0.097]
   [-0.186  0.660  1.630  2.275  4.874]
   [ 2.386 -0.225  3.288 -4.239 -1.403]
   [ 0.825  1.710 -3.246  3.246  1.709]
   [ 0.809  3.695  3.491 -2.113 -2.714]]]]
Your results
[[[[-0.809 -4.550 -5.486 -9.506 -4.512]
   [-0.055  1.145 -5.388 -3.910  0.097]
   [-0.186  0.660  1.630  2.275  4.874]
   [ 2.386 -0.225  3.288 -4.239 -1.403]
   [ 0.825  1.710 -3.246  3.246  1.709]
   [ 0.809  3.695  3.491 -2.113 -2.714]]]]


# Conv 3 (Conv 2 + Channels)

In [None]:
def conv_numpy_3(image, weights, stride, pad = 1):
  if pad != 0:
    image = np.pad(image, ((0,0), (0,0), (pad, pad), (pad,pad)), 'constant')

  batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape
  channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape

  imageHeightOut = np.floor(1+ (imageHeightIn-kernelHeight)/stride).astype(int)
  imageWidthOut = np.floor(1+ (imageWidthIn-kernelWidth)/stride).astype(int)

  out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype = np.float32)

  for c_y in range(imageHeightOut):
    for c_x in range(imageWidthOut):
      for c_channel_out in range(channelsOut):
          for c_channel_in in range(channelsIn):
            for c_kernel_y in range(kernelHeight):
              for c_kernel_x in range(kernelWidth):
                # change 2nd iterable dimension of image/weights to the in_channel
                this_pixel_value = image[0, c_channel_in, stride * c_y + c_kernel_y, stride * c_x + c_kernel_x]
                # weights now depend on both an in and out channel
                this_weight = weights[c_channel_out, c_channel_in, c_kernel_y, c_kernel_x]

                # do a lil sum and put it at the output indice
                # output will depend on the out channel
                out[0,c_channel_out,c_y, c_x] += this_pixel_value * this_weight

  return out

In [None]:
np.random.seed(1)
n_batch = 1
image_height = 4
image_width = 6
channels_in = 5
kernel_size = 3
channels_out = 2

# Create random input image
input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))
# Create random convolution kernel weights
conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))

# Perform convolution using PyTorch
conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)
print("PyTorch Results")
print(conv_results_pytorch)

# Perform convolution in numpy
print("Your results")
conv_results_numpy = conv_numpy_3(input_image, conv_weights, stride=1, pad=1)
print(conv_results_numpy)

PyTorch Results
[[[[ -0.785   5.463  -2.480   5.026  -3.594   7.785]
   [ -6.744   2.534  -0.664   7.149  -9.839   7.849]
   [ -4.794  14.074  -1.060   2.706 -10.182   2.004]
   [  1.809   0.287   4.648  -1.840   3.259   1.073]]

  [[  4.150   5.372   1.699   0.500   0.589   4.361]
   [ -4.123   5.136   4.677  -3.895  -4.990   2.546]
   [  3.991   5.768  -2.315   8.473   1.752   2.766]
   [  1.529   0.318  11.518  -5.444  -2.293   1.270]]]]
Your results
[[[[ -0.785   5.463  -2.480   5.026  -3.594   7.785]
   [ -6.744   2.534  -0.664   7.149  -9.839   7.849]
   [ -4.794  14.074  -1.060   2.706 -10.182   2.004]
   [  1.809   0.287   4.648  -1.840   3.259   1.073]]

  [[  4.150   5.372   1.699   0.500   0.589   4.361]
   [ -4.123   5.136   4.677  -3.895  -4.990   2.546]
   [  3.991   5.768  -2.315   8.473   1.752   2.766]
   [  1.529   0.318  11.518  -5.444  -2.293   1.270]]]]


# Conv 4 (Conv 3 + Multiple Images)

In [None]:
def conv_numpy_4(image, weights, stride, pad = 1):
  if pad != 0:
    image = np.pad(image, ((0,0), (0,0), (pad, pad), (pad,pad)), 'constant')

  batchSize, channelsIn, imageHeightIn, imageWidthIn = image.shape
  channelsOut, channelsIn, kernelHeight, kernelWidth = weights.shape

  imageHeightOut = np.floor(1+ (imageHeightIn-kernelHeight)/stride).astype(int)
  imageWidthOut = np.floor(1+ (imageWidthIn-kernelWidth)/stride).astype(int)

  out = np.zeros((batchSize, channelsOut, imageHeightOut, imageWidthOut), dtype = np.float32)

  for c_batch in range(batchSize):
    for c_y in range(imageHeightOut):
      for c_x in range(imageWidthOut):
        for c_channel_out in range(channelsOut):
            for c_channel_in in range(channelsIn):
              for c_kernel_y in range(kernelHeight):
                for c_kernel_x in range(kernelWidth):
                  # change 2nd iterable dimension of image/weights to the in_channel
                  # as well as 1st iterable dimension based on the batch size
                  this_pixel_value = image[c_batch, c_channel_in, stride * c_y + c_kernel_y, stride * c_x + c_kernel_x]
                  # weights now depend on both an in and out channel
                  this_weight = weights[c_channel_out, c_channel_in, c_kernel_y, c_kernel_x]

                  # do a lil sum and put it at the output indice
                  # output will depend on the out channel
                  out[c_batch,c_channel_out,c_y, c_x] += this_pixel_value * this_weight

  return out

In [None]:
# Set random seed so we always get same answer
np.random.seed(1)
n_batch = 2
image_height = 4
image_width = 6
channels_in = 5
kernel_size = 3
channels_out = 2

# Create random input image
input_image= np.random.normal(size=(n_batch, channels_in, image_height, image_width))
# Create random convolution kernel weights
conv_weights = np.random.normal(size=(channels_out, channels_in, kernel_size, kernel_size))

# Perform convolution using PyTorch
conv_results_pytorch = conv_pytorch(input_image, conv_weights, stride=1, pad=1)
print("PyTorch Results")
print(conv_results_pytorch)

# Perform convolution in numpy
print("Your results")
conv_results_numpy = conv_numpy_4(input_image, conv_weights, stride=1, pad=1)
print(conv_results_numpy)

PyTorch Results
[[[[ -3.633  -1.644   0.169  -1.167  -3.865   6.045]
   [ -9.004   7.303   4.414   0.361  -6.739   3.939]
   [ -1.391  13.502   3.807  -9.379   3.991   5.442]
   [  2.805   6.874  -9.287  -4.468  -1.501   4.607]]

  [[  1.940  -1.410   2.397  -0.235  -0.394  -1.483]
   [  5.049  -3.335  -7.596  -1.586   3.049  -1.857]
   [  3.514   0.475  -1.952  -1.291  -0.589  -0.948]
   [  6.524  -0.020  -3.298  -1.248   3.249  -2.680]]]


 [[[  4.154  -4.764  11.635   0.506  -4.012  -2.081]
   [ -1.125  -0.677  16.749  -7.030  -5.978  -2.629]
   [  0.778  -3.984 -10.284   1.575  -8.888   1.163]
   [  0.556  -2.290   1.407  -3.088   2.227  -5.403]]

  [[  1.048   4.322  -0.901  -5.820   3.998   2.281]
   [ -1.313   8.409  -0.100  14.625   1.223  -3.572]
   [  1.411   1.617  -4.078  -8.107   3.705   0.229]
   [ -3.540  -5.292  -5.619  -4.039  -4.048  -3.446]]]]
Your results
[[[[ -3.633  -1.644   0.169  -1.167  -3.865   6.045]
   [ -9.004   7.303   4.414   0.361  -6.739   3.939]
   [ -