# Geometric Transformations

In [7]:
import numpy as np # for building and manipulating matrices
import cv2 # for matrix transformations
from ipywidgets import interact
from IPython.display import display, Math, Markdown

# for graphics:
from matplotlib import pyplot as plt
plt.rc('axes', axisbelow=True)

%matplotlib notebook
%config InlineBackend.figure_format='retina'

## Affine Transformation

In [8]:
# load image
im = plt.imread( 'images/einstein.png' )

# matrix for storing coordinates
pointsBefore = np.zeros((2,0))

# function handling button clicks
def onclick(event):
    global pointsBefore
    pointsBefore = np.hstack((pointsBefore, [[event.xdata], [event.ydata]]))
    plt.title(f"Original Image ({pointsBefore.shape[1]} points clicked)")
    plt.scatter([event.xdata], [event.ydata], c='r', s=40)

# display original image and prompt user to select 3 points
fig = plt.figure()
plt.imshow(im)
cid = fig.canvas.mpl_connect('button_press_event', onclick) # for handling button click
plt.title("Original Image (Click on the three points)")
plt.show()

<IPython.core.display.Javascript object>

In [9]:
print(pointsBefore)

[[120.56119792 179.00275636 300.21487757]
 [277.79241071 111.12574405 220.43310335]]


#### Warping

The code snippet below randomly generates an affine transformation and warps the image using this transformation.

In the new warped image, click the three targets in the same order you clicked the three targets in the original. These warped points will be stored as columns in a new `pointsAfter` matrix.

In [10]:
# generate a random 2 x 2 linear matrix
a = 1 + 0.25*(np.random.randn()+3)
b = 0.25*(np.random.randn()+1)
d = 0.25*(np.random.randn()+2)
e = 1 + 0.25*(np.random.randn()+3)
# generate a random 2 x 1 translation vector
c = 20*np.random.rand()
f = 20*np.random.rand()

# pack them into a 2x3 affine matrix
tform1 = np.array([[a, b, c], [d, e, f]])
height, width, _ = im.shape
imT1 = cv2.warpAffine(im,tform1,(np.int32(width*a + height*b + c) + 100,np.int32(width*d + height*e + f) + 100))

# matrix for storing coordinates
pointsAfter = np.zeros((2,0))

# function handling button clicks
def onclick(event):
    global pointsAfter
    pointsAfter = np.hstack((pointsAfter, [[event.xdata], [event.ydata]]))
    plt.title(f"Original Image ({pointsAfter.shape[1]} points clicked)")
    plt.scatter([event.xdata], [event.ydata], c='r', s=40)

# display warped image and prompt user to select 3 points (in same order
# as original)
fig = plt.figure()
plt.imshow(imT1)
cid = fig.canvas.mpl_connect('button_press_event', onclick) # for handling button click
plt.title("Warped image (Click on the 3 points)")
plt.show()

<IPython.core.display.Javascript object>

In [11]:
print(pointsBefore)
print(pointsAfter)

[[120.56119792 179.00275636 300.21487757]
 [277.79241071 111.12574405 220.43310335]]
[[271.93785828 346.1775769  591.49142971]
 [658.43298076 355.01847859 664.88860846]]


#### Formulate and Solve Equation
We want to solve for the $6$ unknown entries $a,b,c,d,e,f$ of a $2\times 3$ affine transformation matrix $T$ that maps each point (column) in `pointsAfter` back to the corresponding point (column) in `pointsBefore`.

$$
\begin{align}
\begin{pmatrix}
x'\\
y'
\end{pmatrix}
&=
\underbrace{\begin{pmatrix}
a & b & c\\
d & e & f
\end{pmatrix}}_{T}
\begin{pmatrix}
x\\
y\\
1
\end{pmatrix}
\end{align}.
$$




First express on paper a fully constrained linear system $A\bt = \bb$ where the unknown vector $\bt=(a,b,c,d,e,f)$ contains the 6 entries of the affine matrix $T$. Each pair of points provides two constraints, and you have three pairs, resulting in $6$ constraints total. You have $6$ unknowns, so $A$ is a $6\times 6$ matrix. We now can solve the problem

In [12]:
def add_affine_point_constraints(A, i, pointsAfter):
    A[2*i][0] = pointsAfter[0][i]
    A[2*i][1] = pointsAfter[1][i]
    A[2*i][2] = 1
    A[2*i+1][3]= pointsAfter[0][i]
    A[2*i+1][4] = pointsAfter[1][i]
    A[2*i+1][5] = 1
    return A

#
# Compute transformation matrix to convert columns in pointsAfter
# to corresponding colums in pointsBefore
#
# This function should return a 2x3 matrix of the form:
#
# [[a, b, c],
#  [d, e, f]]
#
# where the 2x2 linear matrix is: [[a, b], [d, e]]
# and the 2x1 translation vector is: [[c], [f]]
def compute_transform( pointsBefore, pointsAfter):
    A = np.zeros((6, 6))
    for i in range(3):
        A = add_affine_point_constraints(A,i,pointsAfter)
    pointsBefore = np.reshape(pointsBefore, 6, order='F')  
    t = np.linalg.solve(A,pointsBefore)
    T = np.reshape(t,(2,3))
    return T

Check if the function successfully restored the warped image back to its original appearance.

In [13]:
tback = compute_transform( pointsBefore, pointsAfter ) # the matrix to transform warped image back to original

# apply transformation to restore original image
imT2 = cv2.warpAffine( imT1, tback, (height,width))

fig = plt.figure()
plt.imshow(imT2)
plt.title('Restored image')
plt.show()

<IPython.core.display.Javascript object>

Additional check. The output should show an approximate identity matrix (depending how accurately I selected the points) and an image which applies this matrix to the original.

In [14]:
# multiply the forward and backward transforms to get something close to the identity matrix
iT1 = np.vstack([tback, [0,0,1]])
T1 = np.vstack([tform1, [0,0,1]])
ident = iT1 @ T1 
print(f"Approximate Identity Matrix:\n{ident[:2,:2].round(5)}")

imT3 = cv2.warpAffine( im, ident[:2,:], (height,width)) # affine
fig = plt.figure()
plt.imshow(imT3)
plt.title('Restored image 2')
plt.show()

Approximate Identity Matrix:
[[0.97207 0.008  ]
 [0.01369 0.97355]]


<IPython.core.display.Javascript object>

## Perspective Transformations

Important to note that due to how I wrote the code below, it is important to select the corners of the licence plate in order from top left corner, top right corner, bottom right corner, bottom left corner.

In [22]:
# load image
im = plt.imread( 'images/license.jpg' )

# matrix for storing coordinates
clicked = np.zeros((2,0))

# function handling button clicks
def onclick(event):
    global clicked
    clicked = np.hstack((clicked, [[event.xdata], [event.ydata]]))
    plt.title(f"Original Image ({clicked.shape[1]} points clicked)")
    plt.scatter([event.xdata], [event.ydata], c='r', s=40)

# display original image and prompt user to select 4 points
fig = plt.figure()
plt.imshow(im)
cid = fig.canvas.mpl_connect('button_press_event', onclick) # for handling button click
plt.title("Original Image (Click 4 points)")
plt.show()

<IPython.core.display.Javascript object>

I can model the apparant 2D distortion that points undergo due to such perspective projection using a $3 \times 3$ *homography* matrix $H$:
$$
\begin{align*}
\underbrace{\begin{pmatrix}
a & b & c\\
d & e & f\\
g & h & 1
\end{pmatrix}}_{H}
\begin{pmatrix}
x\\
y\\
1
\end{pmatrix} &=
\begin{pmatrix}
w'x'\\
w'y'\\
w'
\end{pmatrix}
\end{align*}.
$$
Note a homography matrix is always defined up to scale. For simplicity, assume that the bottom right coefficient of the homography is 1.

I want to deduce the entries of the homography $H$ that maps the four corners I clicked above to the four corners of an undistorted rectangular license plate. The unknowns are the elements of the homography matrix: $a, b, c, d, e, f, g, h$, while, for a given pair of corresponding points, $x, y$ and $x', y'$ are known. $w'$ is not known per say but can be deduced easily from other unknowns and known quantities. Each pair of points gives me two constraints, and with four such pairs, I have 8 constraints on 8 unknowns.

`compute_homography` below takes the four pairs of points and returns a $3 \times 3$ homography matrix $H$ that maps the first point of each pair to the second one. 

In [23]:
# Fill in 2 rows (2*i and 2*i+1) in matrix A to map point p to point q
# Subroutine in compute_homography
def add_homography_point_constraints( A, i, p, q ):
    A[2*i][0] = p[0][i]
    A[2*i][1] = p[1][i]
    A[2*i][2] = 1
    A[2*i][6] = -p[0][i] * q[0][i]
    A[2*i][7] = -p[1][i] * q[0][i]
    A[2*i+1][3] = p[0][i]
    A[2*i+1][4] = p[1][i]
    A[2*i+1][5] = 1
    A[2*i+1][6] = -p[0][i] * q[1][i]
    A[2*i+1][7] = -p[1][i] * q[1][i]
    return A

# estimate homography that maps ps to qs
def compute_homography( ps, qs ):
    A = np.zeros((8,8))
    for i in range(4):
        A = add_homography_point_constraints(A,i,ps,qs)
    qs = np.reshape(qs, 8, order='F')
    h = np.linalg.solve(A, qs)
    h = np.append(h,1)
    H = np.reshape(h,(3,3))
    return H

Test homography properly undistorts the clicked points:

In [24]:
# undistored coordinates of license plates (in pixels), i.e. corners of a rectangle
unwarped = np.array([[ 0, 400, 400,   0 ],
                     [ 0,   0, 200, 200 ]])


H = compute_homography( clicked, unwarped )
print(H)

# multiply H by each of the clicked points, but add a homogeneous coordinate 1 to each of them first
q = H @ np.vstack((clicked, [1, 1, 1, 1]))

# now divide by homogeneous coordinate to project back to 2D
print(np.round(q[:-1, :] / q[-1, :]))

# and compare to original unwarped coordinates
print(unwarped)

[[ 9.91884928e+00  1.22287183e+00 -1.46193201e+03]
 [ 1.32795427e-01  1.85913598e+00 -1.87387739e+02]
 [ 8.26404584e-04  1.99742916e-04  1.00000000e+00]]
[[  0. 400. 400.   0.]
 [  0.   0. 200. 200.]]
[[  0 400 400   0]
 [  0   0 200 200]]


Now, let's test this function by warping the license plate image by the homography and plotting it:

In [25]:
# apply transformation to restore the original
im2 = cv2.warpPerspective( im, H, (400,200))

fig = plt.figure()
plt.imshow(im2)
plt.title('Restored image')
plt.show()

<IPython.core.display.Javascript object>