# Exercise 1: Computer Vision Basics + OpenCV (6 Points)

## <b>Exercise 1.1: Loading images (1 Point)
Load the image Lenna.png using OpenCV and display it side by side as both a grayscale image and acolor image. The result should look like Figure 1. Note:np.concatenatecan combine multiple matrices/ images. You can also use this here by creating a new image with double the width. The final imagemust then have 3 color channels. That means you have to copy the grayscale image to the R / G / B channels. You can get the image size viarows, cols = img.shape[: 2].

In [1]:
import numpy as np
import cv2

# open in RGB and in grayscale
lenna_color = cv2.imread('Lenna.png',1)
lenna_gray = cv2.imread('Lenna.png',0)
# copy grayscale to RGB channels for concatenation
lenna_gray_3C = np.stack((lenna_gray,lenna_gray,lenna_gray), axis=-1)
lenna_joined = np.concatenate((lenna_gray_3C,lenna_color), axis=1)

cv2.imshow("test", lenna_joined)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Exercise 1.2: OpenCV experiments (2 Point)
Experiment with the following - as in the lecture discussed - image processing algorithms in OpenCV: <br>
a) Change of color spaces (HSV, LAB, YUV)<br>
b) Adaptives thresholding in the variants Gaussian and Otsu-Thresholding. <br>
c) Canny edge extraction. <br>
Please use the file <i>01_opencv_experiments.py</i> for your implementation. Implement a change of func-<br>
tionality on key press.


In [3]:
import numpy as np
import cv2

cap = cv2.VideoCapture(0)
mode = 0
while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # wait for key and switch to mode
    ch = cv2.waitKey(1) & 0xFF
    if ch == ord('0'):
        mode = 0
    if ch == ord('1'):
        mode = 1
    if ch == ord('2'):
        mode = 2
    if ch == ord('3'):
        mode = 3
    if ch == ord('4'):
        mode = 4
    if ch == ord('5'):
        mode = 5
    if ch == ord('6'):
        mode = 6
    if ch == ord('q'):
        break
        
    if mode == 0:
        ret, frame = cap.read()
    if mode == 1:
        frame = cv2.GaussianBlur(frame, (5, 5), 0)
    elif mode == 2:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    elif mode == 3:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2YUV)
    elif mode == 4:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = cv2.adaptiveThreshold(frame, 255, \
                                      cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
                                      cv2.THRESH_BINARY, 11, 2)
    elif mode == 6:
        frame = cv2.GaussianBlur(frame, (5, 5), 0)
        frame = cv2.Canny(frame, 50, 85)

    # Display the resulting frame
    cv2.imshow('frame', frame)

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## Exercise 1.3: SIFT in OpenCV (1 Point)

Please use with the SIFT descriptor in OpenCV to implement a video streaming example showing SIFT features and visualize its keypoints as illustrated in Figure 2 (http://docs.opencv.org/3.1.0/da/df5/tutorial_py_sift_intro.html). Please use the file <i>01_features.py</i> for your implementation.
![image.png](attachment:image.png)

In [2]:
import cv2

cap = cv2.VideoCapture(0)
cv2.namedWindow('Learning from images: SIFT feature visualization')
while True:

    # 1. read each frame from the camera (if necessary resize the image)
    #    and extract the SIFT features using OpenCV methods
    #    Note: use the gray image - so you need to convert the image
    # 2. draw the keypoints using cv2.drawKeypoints
    #    There are several flags for visualization - e.g. DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS

    # close the window and application by pressing a key

    # YOUR CODE HERE
    
    ret, frame = cap.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    sift = cv2.xfeatures2d.SIFT_create()
    keypoints = sift.detect(frame, None)
    frame = cv2.drawKeypoints(frame, keypoints, outImage=None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
    cv2.imshow('frame', frame)
    
    ch = cv2.waitKey(1) & 0xFF
    if ch == ord('q'):
        break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## Exercise 1.4: Convolution (2 Points)

Implement the convolution of an image with a filter mask on a grayscale image without the functions available in OpenCV or scipy. Use a Gaussian blur as filter mask and a Sobel filter (x- and y-direction) matrix as discussed in the lecture to calculate the image gradients. Generate a picture that represents the gradient strengths (Magnitude of Gradients) using your implementation. <b>Note:</b> Try to make the implementation as efficient as possible. The implementation of OpenCV then runs in real time - this is not necessary. Please use the file <i>01_filter.py</i> for your implementation.

In [4]:
import numpy as np
import cv2


def im2double(im):
    """
    Converts uint image (0-255) to double image (0.0-1.0) and generalizes
    this concept to any range.

    :param im:
    :return: normalized image
    """
    min_val = np.min(im.ravel())
    max_val = np.max(im.ravel())
    out = (im.astype('float') - min_val) / (max_val - min_val)
    return out


def make_gaussian(size, fwhm = 3, center=None):
    """ Make a square gaussian kernel.

    size is the length of a side of the square
    fwhm is full-width-half-maximum, which
    can be thought of as an effective radius.
    """

    x = np.arange(0, size, 1, float)
    y = x[:,np.newaxis]

    if center is None:
        x0 = y0 = size // 2
    else:
        x0 = center[0]
        y0 = center[1]

    k = np.exp(-4*np.log(2) * ((x-x0)**2 + (y-y0)**2) / fwhm**2)
    return k / np.sum(k)


def convolution_2d(img, kernel):
    """
    Computes the convolution between kernel and image

    :param img: grayscale image
    :param kernel: convolution matrix - 3x3, or 5x5 matrix
    :return: result of the convolution
    """
    # TODO write convolution of arbritrary sized convolution here
    # Hint: you need the kernelsize

    offset = int(kernel.shape[0]/2)
    newimg = np.zeros(img.shape)

    # YOUR CODE HERE
    clmn_padding = np.zeros((offset,img.shape[1]), dtype=float) 
    row_padding = np.zeros((offset,img.shape[0]+2*offset), dtype=float) 
    img_padded = np.hstack((clmn_padding.T, img, clmn_padding.T))
    img_padded = np.vstack((row_padding, img_padded, row_padding))

    newimg = np.array([[np.sum(img_padded[j:j+kernel.shape[0],i:i+kernel.shape[1]] * kernel) for i in range(img.shape[1])] for j in range(img.shape[0])])
    return newimg


if __name__ == "__main__":

    # 1. load image in grayscale
    img_gray = cv2.imread('Lenna.png', 0)
    
    # 2. convert image to 0-1 image (see im2double)
    img_norm = im2double(img_gray)

    # image kernels
    sobelmask_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
    sobelmask_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
    gk = make_gaussian(11)
    
    # 3 .use image kernels on normalized image
    img_norm_gk = convolution_2d(img_norm, gk)
    sobel_x = convolution_2d(img_norm_gk, sobelmask_x)
    sobel_y = convolution_2d(img_norm_gk, sobelmask_y)

    # 4. compute magnitude of gradients
    mog = np.sqrt(sobel_x*sobel_x + sobel_y*sobel_y)

    # Show resulting images
    cv2.imshow("Lenna", img_gray)
    cv2.imshow("sobel_x", sobel_x)
    cv2.imshow("sobel_y", sobel_y)
    cv2.imshow("mog", mog)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Exercise 2: K-Means for color quantization (4 Punkte)
Machine learning methods play an important role in many computer vision applications. Throughout the course we will be discussing some recent algorithms. However many classical problems such as feature matching, image segmentation or color quantization (and many many more) rely heavily on well understood ML methods. One example the k-means algorithm. In this exercise you’ll have to implement a simple color quantization (clustering) scheme based on k-means. In doing so, a pre-known number of k groups is formed from a set of similar data points. This standard method is one of the most commonly used techniques for grouping data points, e.g. RGB pixel values in our case (see illustration?? for more details. <br>
<b>Please read the image caption for a better understanding of the representation.</b>). <br><br>

The idea of Color-based Segmentation based on the k-means algorithm is to quantize / segment the colors in the image to group similar colors in the image into one group. The similarity between pixel values can be determined by the Euclidean distance between RGB, LAB or HSV values. Based on the given source code and the comments, implement the k-means algorithm and test it fordifferentkand different color spaces as shown in Figure 3.  Use either the default colors (cluster_colors) or the colors of the cluster center (mean color). <br>
<b>Calculate the total error for each result and print it on the commandline.</b> <br>
Please interpret your results of the clustering and answer the following questions(<b>please submit your answer as an additional .txt or .md file</b>): <br><br>
a) What are the problems of this clustering algorithm? <br>
b) How can I improve the results?

In [13]:
import numpy as np
import cv2
import math
import sys


############################################################
#
#                       KMEANS
#
############################################################

# implement distance metric - e.g. squared distances between pixels
def distance(a, B):
    # need to prevent integer overflow of numpy
    a = a.astype('int32')
    B = B.astype('int32')
    difference =(a-B)
    square_diff = difference*difference
    square_len = np.sum(square_diff, axis=1)
    length = np.sqrt(square_len)
    return length
# k-means works in 3 steps
# 1. initialize
# 2. assign each data element to current mean (cluster center)
# 3. update mean
# then iterate between 2 and 3 until convergence, i.e. until ~smaller than 5% change rate in the error

def update_mean(img, clustermask):
    """This function should compute the new cluster center, i.e. numcluster mean colors"""
    # YOUR CODE HERE
    cluster_names = np.unique(clustermask)
    cluster_centers = np.zeros((len(cluster_names),3), np.uint8)
    for counter, name in enumerate(cluster_names):
        relevant_pixels = (clustermask==name)[:,:,0]
        cluster_centers[counter,:] = np.mean(img[relevant_pixels], axis=0)
    return cluster_centers

def assign_to_current_mean(img, result, clustermask, cluster_centers, cluster_colors):
    """The function expects the img, the resulting image and a clustermask.
    After each call the pixels in result should contain a cluster_color corresponding to the cluster
    it is assigned to. clustermask contains the cluster id (int [0...num_clusters]
    Return: the overall error (distance) for all pixels to there closest cluster center (mindistance px - cluster center).
    """
    overall_dist = 0
    # YOUR CODE HERE
    for y in range(img.shape[0]):
        for x in range(img.shape[1]):
            distances = distance(img[y,x,:],cluster_centers)
            overall_dist += np.amin(distances)
            clustermask[y,x] = np.argmin(distances)
            result[y,x,:] = np.array(cluster_colors)[clustermask[y,x],:]
#             print("%s \t %s " % (img[y,x,:],cluster_centers.tolist()) )
#             print("%s \t %s \t -> %s " % (img[y,x,:], distances, cluster_centers[clustermask[y,x]]) )
    return result, clustermask, overall_dist



def initialize(img):
    """initialize the current_cluster_centers array for each cluster with a random pixel position"""
    # YOUR CODE HERE
    random_x = np.random.randint(img.shape[1])
    random_y = np.random.randint(img.shape[0])
    return img[random_y, random_x,:]

def kmeans(img, numclusters, cluster_colors, use_mean_colors = False):
    """Main k-means function iterating over max_iterations and stopping if
    the error rate of change is less then 2% for consecutive iterations, i.e. the
    algorithm converges. In our case the overall error might go up and down a little
    since there is no guarantee we find a global minimum.
    """
    max_iter = 30
    max_change_rate = 0.02
    old_overall_dist = 0.0
    dist = sys.float_info.max

    clustermask = np.zeros((h1, w1, 1), np.uint8)
    result = np.zeros((h1, w1, 3), np.uint8)
    # initializes each pixel to a cluster
    # iterate for a given number of iterations or if rate of change is
    # very small
    # YOUR CODE HERE
    
    cluster_centers = np.array([initialize(img) for _ in range(numclusters)])
    for it in range(max_iter):
        if use_mean_colors == True:
            cluster_colors = cluster_centers
        result, clustermask, overall_dist = assign_to_current_mean(img, result, \
                                                                   clustermask, \
                                                                   cluster_centers, \
                                                                   cluster_colors)
        cluster_centers = update_mean(img, clustermask)
        if old_overall_dist != 0.0:
            change = abs((old_overall_dist - overall_dist)/old_overall_dist)
            print("Iteration: #%d | Change Rate: %f" % (it,change))
            if change < max_change_rate:
                break
        old_overall_dist = overall_dist
    print("-> Number of iterations: ", it+1) 
    print("* * *")
    return result


# corresponding colors for each cluster
cluster_colors = [[255, 0, 0], \
                  [0, 255, 0], \
                  [0, 0, 255], \
                  [0, 255, 255], \
                  [255, 255, 255], \
                  [0, 0, 0], \
                  [128, 128, 128]]
# initialize current cluster centers (i.e. the pixels that represent a cluster center)
#current_cluster_centers = np.zeros((numclusters, 1, 3), np.float32)

# load image
imgraw = cv2.imread('./Lenna.png')
scaling_factor = 0.5
imgraw = cv2.resize(imgraw, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
h1, w1 = imgraw.shape[:2]

# compare different color spaces and their result for clustering
# YOUR CODE HERE or keep going with loaded RGB colorspace img = imgraw
images = [('LAB',cv2.cvtColor(imgraw, cv2.COLOR_BGR2LAB)), ('HSV',cv2.cvtColor(imgraw, cv2.COLOR_BGR2HSV)), ('RGB',imgraw)]
results = []
# execute k-means over the image
# it returns a result image where each pixel is colored with one of the cluster_colors
# depending on its cluster assignment
# num of cluster

numsclusters = [3,6]
for k in numsclusters:
    print("### Processing k = ", k)
    for title, image in images:
        print("# Processing Color Space", title)
        res = kmeans(image, k, cluster_colors, use_mean_colors = False)

        h1, w1 = res.shape[:2]
        h2, w2 = image.shape[:2]
        vis = np.zeros((max(h1, h2), w1 + w2, 3), np.uint8)
        vis[:h1, :w1] = res
        vis[:h2, w1:w1 + w2] = image
        results.append((k,title,vis))
        
numsclusters = [4,16,32,64]
total = imgraw
for k in numsclusters:
    print("### Processing k = ", k)
    res = kmeans(image, k, cluster_colors, use_mean_colors = True)
    total = np.concatenate((total,res), axis=1)

for k, title, vis in results:
    cv2.imshow(title+' | k = '+str(k), vis)
cv2.imshow('Own Colors with k = '+str(numsclusters), total)
cv2.waitKey(0)
cv2.destroyAllWindows()


### Processing k =  3
### Processing Color Space  LAB
Iteration: #1 | Change Rate: 0.177128
Iteration: #2 | Change Rate: 0.132138
Iteration: #3 | Change Rate: 0.077704
Iteration: #4 | Change Rate: 0.029876
Iteration: #5 | Change Rate: 0.018093
-> Number of iterations:  6
* * *
### Processing Color Space  HSV
Iteration: #1 | Change Rate: 0.257628
Iteration: #2 | Change Rate: 0.001094
-> Number of iterations:  3
* * *
### Processing Color Space  RGB
Iteration: #1 | Change Rate: 0.356322
Iteration: #2 | Change Rate: 0.088113
Iteration: #3 | Change Rate: 0.021607
Iteration: #4 | Change Rate: 0.008129
-> Number of iterations:  5
* * *
### Processing k =  6
### Processing Color Space  LAB
Iteration: #1 | Change Rate: 0.191017
Iteration: #2 | Change Rate: 0.121496
Iteration: #3 | Change Rate: 0.061655
Iteration: #4 | Change Rate: 0.028374
Iteration: #5 | Change Rate: 0.018082
-> Number of iterations:  6
* * *
### Processing Color Space  HSV
Iteration: #1 | Change Rate: 0.329036
Iteration: #2 

In [9]:
import numpy as np
import cv2

img = cv2.imread('./Lenna.png')
h1, w1 = img.shape[:2]
clustermask = np.random.randint(3, size=(h1, w1, 0))
print((clustermask==1).shape)
print(img.shape)
#print(img[(clustermask==1)[:,:,0]])

random_x = np.random.randint(img.shape[1])
random_y = np.random.randint(img.shape[0])
cv2.imshow("Color-based Segmentation Kmeans-Clustering", np.array([[img[random_y, random_x,:] for j in range(101)] for i in range(101)]))
cv2.waitKey(0)
cv2.destroyAllWindows()

(512, 512, 0)
(512, 512, 3)


In [28]:
a = np.array([np.random.randint(255) for x in range(3)])
b = np.array([[np.random.randint(255) for x in range(3)] for y in range(3)])


print("%s - %s " % (str(a), str(b)))

difference =(a-b)
print(difference)
square_diff = difference*difference
print(square_diff)

square_len = np.sum(square_diff, axis=1)
print(square_len)
lenght = np.sqrt(square_len)
print(lenght)

[181   1 137] - [[ 89 235  89]
 [234 109  27]
 [116  36  86]] 
[[  92 -234   48]
 [ -53 -108  110]
 [  65  -35   51]]
[[ 8464 54756  2304]
 [ 2809 11664 12100]
 [ 4225  1225  2601]]
[65524 26573  8051]
[255.97656143 163.01226948  89.72736483]


In [15]:
a=np.array([116, 130, 221])
b=np.array([[54,  8, 76],[1,1,1]])

difference =(a-b)
print(difference)
square_diff = difference*difference
print(square_diff)

square_len = np.sum(square_diff, axis=1)
print(square_len)
lenght = np.sqrt(square_len)
print(lenght)

[[ 62 122 145]
 [115 129 220]]
[[ 3844 14884 21025]
 [13225 16641 48400]]
[39753 78266]
[199.38154378 279.76061195]


In [None]:
    difference =(a-b)
    square_diff = difference*difference
    square_len = np.sum(square_diff, axis=1)
    return np.sqrt(square_len)