# Point Processing - Tutorial 1

ARI2129 - Principles of Computer Vision for AI Francesca Maria Mizzi - 118201L

The first step to create the project is to import all the required libraries which in this case consist of OpenCv (cv2), numpy (np), matplotlib and os.

In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os

The images which are going to be modified are then loaded into a list. I will primarily be using the first photo "moon-tree.jpg" to demonstrate my work however any photos added to the images folder will also work.

In [2]:
images = {}
for x in os.listdir("Images"):
    images[x] = cv2.imread("Images/" + x)

In [3]:
raw = cv2.imread('Images/moon-tree.jpg')

In [5]:
cv2.imshow("Photo",raw)
cv2.waitKey(0)

-1

## Exercise 1 - Sliding Window

In order to generate a sliding window, I created a class "Window" which will be used to define the location of the sliding window and then shift this window according to the determined stride.

Certain parameters must be filled in order to generate the window:
 - image - the image where the window is going to be generated
 - scale - the size of the window (if 3 is entered then a window of 3 pixels by 3 pixels is generated)
 - stride - the amount of pixels which are going to be skipped with each shift
 
 It is to be noted that a stride or scale of 1 is not recommended since it will take a lot of time.

In [4]:
class Window:
    
    def __init__(self, image, scale, stride):
        
        self.xlim = image.shape[1]+scale
        self.ylim = image.shape[0]+scale
        self.top_left = (0,0)
        self.bottom_right = (scale,scale)
        self.prevBtmRght = scale
        self.scale = scale
        self.stride = (stride, stride)
        
        try:
            self.channels = image.shape[2]
        except:
            self.channels = 1
            
    def pos(self):
        
        return self.top_left, self.bottom_right
    
    def newMovement(self):

        self.top_left, self.bottom_right = self.newPos()
        return self.top_left, self.bottom_right
    
    def newPos(self):
        
        if(self.bottom_right + self.stride)[0] >= (self.xlim - self.scale):
            return (0, self.top_left[1] + self.stride[1]), (self.scale, self.bottom_right[1] + self.stride[1])
        
        else:
            return (self.top_left[0] + self.stride[0], self.top_left[1]), (self.bottom_right[0] + self.stride[0], self.bottom_right[1])
    
    def boundaries(self, tleft = None, tright = None):
        
        if tleft is None:
            tleft = self.top_left
        if tright is None:
            tright = self.bottom_right 
            
        return tright[0] <= self.xlim and tright[1] <= self.ylim and tleft[0] >= 0 and tleft[1] >= 0
    
    def imgBoundaries(self, image):
        
        img = []
        
        for i in range(self.top_left[1], self.bottom_right[1]):
            
            if i >= image.shape[0]:
                continue
                
            img.append(image[i][self.top_left[0]: self.bottom_right[0]])
            
        if self.channels == 1:
            
            return np.resize(np.array(img), (self.scale, self.scale))
        
        else:
            
            return np.resize(np.array(img), (self.scale, self.scale, self.channels))
    
    def checkY(self):
        
        if self.prevBtmRght == self.bottom_right[1]:
            return False
        
        else:
            
            self.prevBtmRght = self.bottom_right[1]
            return True
        

The window class is made up of 6 methods which are used to manipulate the window:
 - pos() - this method returns the current position of the window
 - newMovement() - this method shifts the window according to the new position generated by newPos()
 - newPos() - this method returns the potential position of the window after it shifts. This means either incrementing the X value or, in the case where the window has reached the edge of the photo, incrementing the Y value and resetting the X value
 - boundaries() - this method recieves locations and determines whether the given locations are within the boundaries of the image
 - imgBoundaries() - this method returns an array which contains the pixels within the window
 - checkY() - this method checks whether the value of the bottom right corner of the window has changed

I then created a function which can be used in order to generate a sliding window over any image. This function takes in 4 parameters: the image on which the sliding window is generated, the size of the window, how many pixels are to be skipped between each window and whether the user would like to see the window over the image or have it just be an internal process.

In [5]:
def sliding_window(image, scale, stride, show):

    window = Window(image, scale, stride)
    top_left, bottom_right = window.pos()

    image = cv2.rectangle(image.copy(), top_left, bottom_right, (204, 153, 255))
    cv2.imwrite("Output/sliding.png", image, [cv2.IMWRITE_PNG_COMPRESSION, 0])

    tempright, templeft = window.newPos()

    while window.boundaries(bottom_right):
        image = cv2.rectangle(image.copy(), top_left, bottom_right, (204, 153, 255))
        if show:
            cv2.imshow("Window", image)
            cv2.waitKey(int(1/35*1000))

        top_left, bottom_right = window.newMovement()
        tempright = window.newPos()

    cv2.destroyAllWindows()

In [47]:
sliding_window(image = raw, scale = 150, stride = 50, show = True)

## Exercise 2 - Convolution on ROI

In order to perform convolution on the region of interest, a class must first be created for the kernel. The two properties needed in order to create the kernel is the array and the weight.

In [6]:
class Kernel:
    
    def __init__(self, kernel, weight):
        
        self.kernel = kernel
        self.weight = weight
        
    def filterRoI(self, roi, axis = 0, channels = 1):
        
        results = []
        
        if axis == 2:
            
            for i in range(channels):
                
                if channels == 1:
                    _filter = self.kernel * roi
                
                else:
                    _filter = self.kernel * roi[:, :, i]
                    
                filsum = _filter.sum()
                
                if channels == 1:
                    _filter = self.kernel.T * roi
                
                else:
                    
                    _filter = self.kernel.T * roi[:, :, i]
                    
                filsum2 = _filter.sum()
                
                results.append((((filsum**2) + (filsum2 **2)) ** (1/2)) * self.weight)
                
            return np.array(results)
        
        else:
            if axis == 0:
                kernel = self.kernel 
            else:
                self.kernel.T
                
            for i in range(channels):
                
                if channels == 1:
                    _filter = kernel * roi
                
                else:
                    _filter = kernel * roi[:, :, i]
                    
                results.append(_filter.sum() * self.weight)
                
            return np.array(results)
        
    def filterWhole(self, image, stride = 1, window = None, axis = 0):
        
        new = []
        line = []
        
        if window is None:
            
            shift = Window(image, self.kernel.shape[0], stride)
            
        else:
            
            image = window.imgBoundaries(image)
            shift = Window(image, self.kernel.shape[0], stride)
            
        top_left, _ = shift.newPos()
        while shift.boundaries(top_left):
            
            roi = shift.imgBoundaries(image)
            
            if shift.checkY():
                new.append(line)
                line = []
                
            line.append(self.filterRoI(roi, axis, shift.channels))
            
            shift.newMovement()
            top_left, _ = shift.newPos()
            
        return np.array(new)

The first method in the Kernel class is the filterRoI() method which has the following parameters:
 - roi - the region of interest from the image
 - axis - the axis of the operation
 - channels - the number of channels in the RoI
 
The method is split up into 2 parts, determined by the axis.

**If the axis is 2**, the method multiplies the pixels of the region of interest in the channel with those of the kernel, then adds them together. It then multiplies the pixels of the RoI in the channel with those in the transpose of the kernal, then adds them together. The magnitude of the added values is found by squaring the values, adding them together and finding the square root. This answer is then multiplied by the weight of the kernel and added to the list "results". This is done for all the channels. The list of results is converted to an array and returned to the user.

**If the axis is not 2**, the method multiplies the pixels in the channel with those of the kernel then adds them together. It then multiplies this resilt with the weight of the kernel and adds it to the "results" list. This is done for all the channels. The list of results is converted to an array and returned to the user.

It is important that the region of interest and the kernel have the same shape or the method will not work.

The other method in the class is the filterWhole() method. This method has 4 parameters: the image which needs to be filtered, the amount of pixels which need to be skipped (stride), if the filter is to be carried out on a region of interest, a window is required and the axis of operation.

A check is carried out to see whether the filter is applied to a window or to the whole image. If a window is defined, the pixels within this window are assigned as the image using the imgBoundaries() method defined earlier.

A class was created for the Sobel kernel used to carry out the convolution, storing the array and the weight of the kernel.

In [7]:
class Sobel:
    def __init__(self, weight):
        self.kernel = Kernel(np.array([[-1, 0, 1],
                                       [-2, 0, 2],
                                       [-1, 0, 1]]),
                             weight)

    def filter_s(self, image, stride=1, window=None, axis=2):
        return self.kernel.filterWhole(image, stride, window, axis)

After the sobel kernel is initialized, a region of interest is extracted in a 900x900 pixel square. This region of interest is then passed through the sobel filter and presented to the user.

In [8]:
sobel = Sobel(1)

wind = Window(raw, 500, 1)
roi = wind.imgBoundaries(raw)

sobel_filtered = sobel.filter_s(image = raw, window = wind, axis = 2)

cv2.imshow("Original ROI",roi)
cv2.imshow("Sobel Filter", sobel_filtered)
cv2.waitKey(0)


-1

## Exercise 3 - Convolution on the whole image

The filtering of the whole image is done simply by passing the image through the filter, as done prior, but excluding the window.

In [108]:
whole_sobel_filtered = sobel.filter_s(image = raw, axis = 2)

In [None]:
cv2.imshow("Original Photo",raw)
cv2.imshow("Sobel Filter", whole_sobel_filtered)
cv2.waitKey(0)

## Exercise 4 - Different Convolution Kernels

In order to filter the images through gaussian and bilinear kernels, I created two seperate classes for the kernels.

In [9]:
class Bilinear:
    
    def __init__(self, weight):
        
        self.kernel = Kernel(np.array([[1, 2, 1],
                                       [2, 4, 2],
                                       [1, 2, 1]]), weight)
        
    def filter_bil(self, image, pace = 1, window = None, axis = 0):
        return self.kernel.FilterImage(image, pace, window, axis)

class Gaussian:
    
    def __init__(self, size, weight):
        
        gauss = size // 2
        
        x = np.arange(0, size, 1, float)
        
        y = x[:, np.newaxis]
        
        tempx = tempy = size // 2
        self.kernel = Kernel(np.exp(-4 * np.log(2) * ((x - tempx) ** 2 + (y - tempy) ** 2) / gauss ** 2), weight)
        
    def filter_gaus(self, image, pace = 1, window = None, axis = 0):
        return self.kernel.FilterImage(image, pace, window, axis)


I initialized the two kernels, defined their weights and added all three filters to a dict.

In [10]:
bilinear = Bilinear(1/8)
gaussian = Gaussian(5, 1/4)

filters = {"Sobel": sobel,
          "Bilinear": bilinear,
           "Gaussian": gaussian}

I then passed all my images through the filters in order to have a good representation as to the effects of the filters. 

It is to be noted that the x and y values for the sobel kernel were processed seperately.

In [14]:
for photo in images:
    for entry in filters:
        if entry == "Sobel":

            filteredX = filters[entry].filter_s(image=images[photo], axis = 0)
            cv2.imwrite("Filters"+ photo +"FILTERED_SOBELX.png", filteredX, [cv2.IMWRITE_PNG_COMPRESSION, 0])
            filteredY = filters[entry].filter_s(image=images[image], axis = 1)
            cv2.imwrite("Filters"+ photo +"FILTERED_SOBELY.png", filteredY, [cv2.IMWRITE_PNG_COMPRESSION, 0])

        elif entry == "Bilinear":
            filteredB = filters[entry].filter_bil(image=images[photo])
            cv2.imwrite("Filters"+ photo +"FILTERED_BILINEAR.png", filteredB, [cv2.IMWRITE_PNG_COMPRESSION, 0])

        else:
            filteredG = filters[entry].filter_gaus(image=images[photo])
            cv2.imwrite("Filters"+ photo +"FILTERED_GAUSSIAN.png", filteredG, [cv2.IMWRITE_PNG_COMPRESSION, 0])

KeyboardInterrupt: 