In [None]:
from PIL import Image
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from urllib.request import urlretrieve
from IPython.display import display
from skimage import color
import albumentations

%matplotlib inline

In [None]:
urlretrieve('https://www.gannett-cdn.com/presto/2020/02/03/PROC/23928de8-75f4-4cbd-aa93-a6f68ad330f7-20200201-_MS_0622.jpg', 'golden.jpg')

In [None]:
img = Image.open('golden.jpg').resize(size=(244,244))
img

In [None]:
# Convert img to a np.array for later
img_arr = np.array(img)

In [None]:
def apply_filter(region, filter):
    # Your code here. Delete the error handling code below.
    raise NotImplementedError()

In [None]:
r = np.array([
    [5, 0, 8],
    [1, 9, 5],
    [6, 0, 2]
])

f = np.array([
    [0, 0, 0],
    [0, 1, 0],
    [0, 0, 0]
])

assert apply_filter(r, f) == 9, 'Incorrect, please try again!'

In [None]:
#@title Double-click this cell for the solution
def apply_filter(region, filter):
    """
    Applies a filter to a an array of the same shape.
    Returns the sum of element-wise multiplication.
    """
    assert region.shape == filter.shape, f"""
    There is a mismatch between the filter shape and region shape.
    Filter shape: {filter.shape}
    Region shape: {region.shape}
    """

    return (region * filter).sum()

In [None]:
def zero_pad(arr):
    """
    Performs zero-padding on an array of 2 or 3 dimensions.
    """
    raise NotImplementedError()

In [None]:
assert zero_pad(np.random.randn(3, 3)).shape == (5, 5)
assert zero_pad(np.random.randn(3, 3, 3)).shape == (5, 5, 3)

In [None]:
#@title Double-click this cell for the solution
def zero_pad(arr):
    """
    Performs zero-padding on an array of 2 or 3 dimensions.
    """
    if arr.ndim == 2:
        x, y = arr.shape
        b = np.zeros((x+2, y+2), dtype=np.float)
    elif arr.ndim == 3:
        x, y, z = arr.shape
        b = np.zeros((x+2, y+2, z), dtype=np.float)
    b[1:-1, 1:-1] = arr.copy()
    return b

In [None]:
def output_array(arr):
    raise NotImplementedError('Write this function!')

In [None]:
assert output_array(np.random.randn(16, 16)).shape == (14, 14)
assert output_array(np.random.randn(16, 16, 3)).shape == (14, 14, 3)

In [None]:
#@title Double-click this cell for solution
def output_array(arr):
    """
    Returns a np array of zeros of type float
    that's the size of the output feature map.
    """
    if arr.ndim == 2:
        x, y = arr.shape
        return np.zeros((x-2, y-2), dtype=np.float)
    elif arr.ndim == 3:
        x, y, z = arr.shape
        return np.zeros((x-2, y-2, z), dtype=np.float)
    else:
        raise ValueError()

In [None]:
def convolve(arr, filter, pad=True):
    raise NotImplementedError()

In [None]:
assert convolve(np.random.randn(244, 244), np.random.randn(3, 3)).shape == (244, 244)
assert convolve(np.random.randn(244, 244), np.random.randn(3, 3), pad=False).shape == (242, 242)

In [None]:
#@title Double-click this cell for solution
def convolve(arr, filter, pad=True):
    """
    Applies a filter over an image array.
    Expects images to be 1-channel images (h, w) or 
    3-channel images (h, w, c).

    arr: image array
    filter: also an array, filter you want to apply
    pad: whether to apply zero padding
    """
    a = arr.copy()
    
    if pad:
        a = zero_pad(a)
    
    output = output_array(a)
    n_rows, n_cols = output.shape[:2]
    filter_rows, filter_cols = filter.shape[:2]


    for i in range(n_rows):
        for j in range(n_cols):
            window = a[i:i+filter_rows, j:j+filter_cols]
            o = (window * filter).sum()
            output[i,j] = o
    
    return output

In [None]:
# Initially, let's work with a grayscale image.
# This will help make some of the concepts easier.
# Later, we will go back to working with the color image.
grayscale_img = color.rgb2gray(img_arr)

In [None]:
grayscale_img.shape

In [None]:
plt.imshow(grayscale_img, cmap='Greys_r')
plt.xticks([])
plt.yticks([])

In [None]:
# This filter will create large values when
# there's a high value on the upper-left
# and a low value on the lower-right
diag_edges = np.array([
    [1, 0, 0],
    [0, 0, 0],
    [0, 0, -1]
])

# This filter will create large values when
# there's a high value on the upper-right
# and a low value on the lower-left
other_diag_edges = np.array([
    [0, 0, 1],
    [0, 0, 0],
    [-1, 0, 0]
])

# This filter will create large values when
# there's a high value on the top
# and a low value on the bottom
horizontal_edges = np.array([
    [0, 1, 0],
    [0, 0, 0],
    [0, -1, 0]
])

# This filter will create large values when
# there's a high value on the left
# and a low value on the right
vert_edges = horizontal_edges.T

In [None]:
# Play around with the different filters to see if you
# can get a feel for how they're applied to images
# and the type of features they can generate
plt.figure(figsize=(10, 10))
conv_image = convolve(grayscale_img, diag_edges, pad=False)
print(conv_image.shape)
plt.imshow(conv_image, cmap='Greys_r')
plt.colorbar()
plt.xticks([])
plt.yticks([])

In [None]:
fig = plt.figure(figsize=(12, 12))
np.random.seed(8)
for i in range(4):
    ax = fig.add_subplot(2, 2, 1+i)
    filter = np.random.randn(3, 3)
    pad = i>1
    print(f"Filter {i}:", '\n', filter, '\n')
    conv_image = convolve(grayscale_img, filter, pad=pad)
    ax.imshow(conv_image, cmap='Greys_r')
    ax.set_title(f'Shape: {conv_image.shape} (zero_pad = {str(pad)})')
    ax.set_xticks([])
    ax.set_yticks([])

In [None]:
fig = plt.figure(figsize=(12, 12))
np.random.seed(8)
for i in range(4):
    ax = fig.add_subplot(2, 2, 1+i)
    filter = np.random.randn(3, 3, 3)
    pad = i>1
    # print(f"Filter {i}:", '\n', filter, '\n')
    conv_image = convolve(img_arr/255, filter, pad=pad)
    ax.imshow(conv_image, cmap='Greys_r')
    ax.set_title(f'Shape: {conv_image.shape} (zero_pad = {str(pad)})')
    ax.set_xticks([])
    ax.set_yticks([])

In [None]:
# Instantiate a convolutional layer
# We can have any number of outputs - we chose 4 here
# just because we wanted a number of feature maps that are easy to visualize.
conv_layer = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3, stride=1, padding=1)

In [None]:
img_tensor = torch.tensor(img_arr/255).float().moveaxis(2, 0).unsqueeze(0)

In [None]:
# Change our image to a scaled tensor with shape 
# (batch_size, n_channels, n_rows, n_cols)
img_tensor = (
    torch.tensor(img_arr/255) # Scale the image array and turn it into a tensor
    .float() # Convert it to a float
    .permute(2, 0, 1) # Move the channels axis first
    .unsqueeze(0) # Give it a batch dimension
)
img_tensor.shape

In [None]:
# Get the outputs of our 4 channels
outputs = conv_layer(img_tensor).squeeze()

In [None]:
fig = plt.figure(figsize=(12, 12))
for i, o in enumerate(outputs):
    ax = fig.add_subplot(2, 2, 1+i)
    ax.imshow(o.detach().numpy(), cmap='Greys_r')
    ax.set_title(f"Output shape: {o.shape}")
    ax.set_xticks([])
    ax.set_yticks([])

In [None]:
# What is the shape of our parameters?
# (n_filters x n_channels x filter_height x filter_width)
list(conv_layer.parameters())[0].shape