In [24]:
import numpy as np
from PIL import Image
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import urllib.request

In [5]:
conv = nn.Conv2d(3, 15, 3, 1, 1)
conv

Conv2d(3, 15, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [6]:
# Check the shape of the weights to be learned
conv.weight.shape

torch.Size([15, 3, 3, 3])

In [7]:
urllib.request.urlretrieve("https://upload.wikimedia.org/wikipedia/commons/thumb/2/26/Boat_in_the_beach_Chacachacare.jpg/640px-Boat_in_the_beach_Chacachacare.jpg", "boat.png")
img = Image.open("boat.png")

In [8]:
# Load the img as a numpy array to see the values at individual pixels
# Note that the structure of the image we loaded puts the channel 
# information at the innermost dimension – which is different 
# when we load the image in Torch.
np.array(img).shape

(378, 640, 3)

In [18]:
img.show()

In [9]:
img.size

(640, 378)

In [10]:
x = TF.to_tensor(img)
x.shape

torch.Size([3, 378, 640])

In [11]:
x.unsqueeze_(0)

tensor([[[[0.7373, 0.7373, 0.7333,  ..., 0.8549, 0.8549, 0.8549],
          [0.7333, 0.7333, 0.7333,  ..., 0.8627, 0.8588, 0.8549],
          [0.7333, 0.7294, 0.7294,  ..., 0.8627, 0.8588, 0.8549],
          ...,
          [0.8000, 0.8039, 0.7961,  ..., 0.8627, 0.7922, 0.8275],
          [0.7961, 0.8196, 0.8549,  ..., 0.7804, 0.7020, 0.6863],
          [0.8627, 0.8471, 0.8392,  ..., 0.7451, 0.7059, 0.7333]],

         [[0.8431, 0.8431, 0.8392,  ..., 0.9176, 0.9176, 0.9176],
          [0.8392, 0.8392, 0.8392,  ..., 0.9255, 0.9216, 0.9176],
          [0.8392, 0.8353, 0.8353,  ..., 0.9255, 0.9216, 0.9176],
          ...,
          [0.7176, 0.7137, 0.6941,  ..., 0.7804, 0.7098, 0.7451],
          [0.7137, 0.7294, 0.7569,  ..., 0.6980, 0.6196, 0.6039],
          [0.7804, 0.7569, 0.7412,  ..., 0.6627, 0.6235, 0.6510]],

         [[0.9490, 0.9490, 0.9451,  ..., 0.9765, 0.9765, 0.9765],
          [0.9451, 0.9451, 0.9451,  ..., 0.9843, 0.9804, 0.9765],
          [0.9451, 0.9412, 0.9412,  ..., 0

In [12]:
x.shape

torch.Size([1, 3, 378, 640])

In [22]:
# Let's define a simple filter that will boost the contrast sharply.
img_filter = torch.zeros((1,3,3,3))
img_filter[0, :] = torch.tensor([[-10, 10, -10], [10, 100, 10], [-10, 10, 10]])
img_filter = torch.nn.Parameter(img_filter)

In [23]:
img_filter

Parameter containing:
tensor([[[[-10.,  10., -10.],
          [ 10., 100.,  10.],
          [-10.,  10.,  10.]],

         [[-10.,  10., -10.],
          [ 10., 100.,  10.],
          [-10.,  10.,  10.]],

         [[-10.,  10., -10.],
          [ 10., 100.,  10.],
          [-10.,  10.,  10.]]]], requires_grad=True)

In [27]:
# z is originally created as a 1x1x378x640 array, of which we extract
# the actual image
# This produces a sharp highlighted grayscale image
z = F.conv2d(x, img_filter, padding=1, stride=1)
z = z.detach().numpy()[0][0]
contrast_img = Image.fromarray(z)
contrast_img.show()

In [29]:
contrast_img.size

(640, 378)