In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np

import functools as ft
import itertools as it
import operator as op

from utilities import my_show, my_gshow, my_read, my_read_g, my_read_cg

img_dir = '../common/'

%matplotlib inline

# Features

In [None]:
# flat space is hard to localize
# edges are hard to localize along the edge
# corners are "easy" to localize

# regions when moved cause "maximal variation" ... 
# i.e., small jitter on a blob/corner results in lots of differences
#       even big jitter on blue sky background has lots of similarities
# finding these is "feature detection"

# building a "good context" around a feature is "feature description"

In [None]:
box = np.zeros((15, 15), dtype=np.int8)
box[4:11, 4:11] = 1.0

fig, axes = plt.subplots(2,3, figsize=(9,6))

def show_asis(ax, arr, title):  
    ' custom display; using colors to show boundaries '
    ax.imshow(arr, vmin=0, vmax=1, interpolation='none')
    ax.axis('off')
    ax.set_title(title)
    
show_asis(axes[0,0], box,           'whole')
show_asis(axes[0,1], box[:4, :4],   'back')
show_asis(axes[0,2], box[5:9, 5:9], 'fore')

show_asis(axes[1,0], box[5:9, 2:6], 'l-edge')
show_asis(axes[1,1], box[2:6, 5:9], 't-edge')

show_asis(axes[1,2], box[2:6, 2:6], 'corner')

fig.tight_layout()

# Harris Corner

In [None]:
# make a chessboard
base_len = 100
black = np.zeros((base_len, base_len), dtype=np.uint8)
white = np.zeros((base_len, base_len), dtype=np.uint8) + 255

top_left = np.c_[np.r_[white,black], np.r_[black,white]]
top_left.shape

board = np.tile(top_left, (4,4))

my_gshow(plt.gca(), board, interpolation=None)

In [None]:
fig, axes = plt.subplots(1,3,figsize=(21,12))

 # block size, aperature in sobel, coeff in Harris
corners = cv2.cornerHarris(board.astype('float32'), 2, 3, 0.04)
my_gshow(axes[0], corners[50:150,50:150])


#result is dilated for marking the corners, not important
corners = cv2.dilate(corners,None)
my_gshow(axes[1], corners[50:150, 50:150])

# Threshold for an optimal value, it may vary depending on the image.
cboard = cv2.cvtColor(board, cv2.COLOR_GRAY2RGB)
cboard[corners>0.01*corners.max()] = [255,0,0]
my_show(axes[2], cboard[50:150, 50:150])

In [None]:
y,x = r,c = board.shape
pts1 = np.float32([[0,0], [x,0], [0,y], [x,y]])
pts2 = np.float32([[base_len*2,0], [x-(base_len*2), 0], [0,y], [x,y]])

M = cv2.getPerspectiveTransform(pts1,pts2)
skewed = cv2.warpPerspective(board, M, board.shape, 
                             # flags=cv2.INTER_NEAREST, 
                             borderValue=256)

my_gshow(plt.gca(), skewed, interpolation=None)

In [None]:
fig, axes = plt.subplots(1,3,figsize=(15,9))

# args are block size, aperature in sobel, coeff in Harris
corners = cv2.cornerHarris(skewed.astype(np.float32), 2, 3, 0.04)
my_gshow(axes[0], corners)


#result is dilated for marking the corners, not important
corners = cv2.dilate(corners,None)
my_gshow(axes[1], corners)

# Threshold for an optimal value, it may vary depending on the image.
cskewed = cv2.cvtColor(skewed, cv2.COLOR_GRAY2RGB)
cskewed[corners>0.01*corners.max()] = [255,0,0]
my_show(axes[2], cskewed[:250, :250])

# Attributing Corners to Locations Below Pixel Level

In [None]:
# high accuracy corners
fig, axes = plt.subplots(1,4,figsize=(12,8))

my_gshow(axes[0], board[90:110, 90:110], interpolation=None)


corners = cv2.cornerHarris(board.astype(np.float32), 2, 3, 0.04) # block size, aperature in sobel, coeff in Harris

#result is dilated for marking the corners, not important
corners = cv2.dilate(corners,None)
corners = cv2.threshold(corners, 0.01*corners.max(), 255, 0)[1].astype(np.uint8)
my_gshow(axes[1], corners[90:110, 90:110], interpolation=None)


# find centroids
ret, labels, stats, centroids = cv2.connectedComponentsWithStats(corners)

# define the criteria to stop and refine the corners
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.001)
fine_corners = cv2.cornerSubPix(board.astype(np.float32), np.float32(centroids), 
                                (5,5), (-1,-1), criteria)

centroids, fine_corners = centroids.astype(np.int32), fine_corners.astype(np.int32)

# draw located corners on board (color version)
cboard = cv2.cvtColor(board, cv2.COLOR_GRAY2RGB)
r,c = centroids[:,1], centroids[:,0]  # x,y --> r,c
cboard[r,c] = [255,0,0]
my_show(axes[2], cboard[90:110, 90:110], interpolation=None)


r,c = fine_corners[:,1], fine_corners[:,0]
cboard[r,c] = [0,255,0]

my_show(axes[3], cboard[90:110, 90:110], interpolation=None)

# Features: Shi-Tomasi

In [None]:
scene, scene_g = my_read_cg(img_dir+'data/tsukuba.png')

corners = cv2.goodFeaturesToTrack(scene_g,25,0.01,10).squeeze().astype(np.int32)
print(corners.shape)

[cv2.circle(scene, (x,y), 3, [255,0,0], -1) for x,y in corners] # -1 -> filled circle

my_show(plt.gca(), scene)

# Features:  Histogram-of-Oriented-Gradients

In [None]:
# examples of code here:
# https://www.learnopencv.com/histogram-of-oriented-gradients/
# https://docs.opencv.org/3.2.0/dd/d3b/tutorial_py_svm_opencv.html

In [None]:
from sklearn import datasets
digits = datasets.load_digits()
print(dir(digits))
print(digits.images.shape) # 1797x8x8 ---> 1797 images that are 8x8

TEST_IMG = 172

my_gshow(plt.gca(), digits.images[TEST_IMG], interpolation=None)
#my_gshow(plt.gca(), digits.images[TEST_IMG], interpolation='bilinear')

true_digit = digits.target_names[digits.target[TEST_IMG]]
plt.gca().set_title("It's a {}".format(true_digit));

In [None]:
import numpy.linalg as nla

def hog(img, unsigned=True, num_angle_bins=10, norm=True):
    ' this is mini-hog that treats a small image as one unit cell '
    # calculate gradients, convert to angle, magnitude (polar)
    # convert radians (pi like) to degrees (180/360 like)
    gx = cv2.Sobel(img, cv2.CV_64F, 1, 0)
    gy = cv2.Sobel(img, cv2.CV_64F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    ang = (ang / 2*np.pi) * 360.0
    if unsigned: # treat opposite directions the same
        ang %= 180.0
        upper_bin = 180.0
    else:
        upper_bin = 360.0

    # create bins by angle
    bins, bin_size = np.linspace(0, upper_bin, num_angle_bins, retstep=True)
    n_bins   = bins.size
    
    # find proper neighbor bins for each angle 
    # (note, max - 180 or 360 - is still its own bin)
    lwr_bins = np.uint8(ang // bin_size)
    upr_bins = np.uint8(lwr_bins + 1)
    
    # weight by angle <-> bin-center distance
    lwr_contrib = (ang - bins[lwr_bins]) / bin_size
    upr_contrib = (bins[upr_bins] - ang) / bin_size

    # place upper most bin (180 or 360) into 0 bin
    upr_bins[upr_bins == n_bins-1] = 0
    
    # add.at is like += but will keep adding repeated index values
    acc = np.zeros(n_bins - 1)
    np.add.at(acc, lwr_bins, lwr_contrib*mag)
    np.add.at(acc, upr_bins, upr_contrib*mag)
    # alternative to add.at
    # acc =  np.bincount(lwr_bins, lwr_contrib, minlength=n_bins)
    # acc += np.bincount(upr_bins, upr_contrib, minlength=n_bins)
    
    
    # normalize the vector so it's length (sqrt(a**2 + b**2 ... etc.)) is 1.0
    if norm:
        acc /= nla.norm(acc)
    return acc

In [None]:
fig, axes = plt.subplots(1,3)
my_gshow(axes[0], digits.images[TEST_IMG], interpolation=None)

this_hog = hog(digits.images[TEST_IMG], norm=False)
axes[1].hist(range(9), weights=this_hog)
axes[1].set_title("Unnormalized\nUnsigned\nHistogram of Gradients")

this_hog = hog(digits.images[TEST_IMG])
axes[2].hist(range(9), weights=this_hog)
axes[2].set_title("Unsigned\nHistogram of Gradients")

fig.tight_layout()

In [None]:
print(hog(digits.images[TEST_IMG]))

In [None]:
# these are almost undocumented; the only one we care about is
# the last argument (use directionality) False 
default_magic = (1, -1.0, 0, 0.2, 1, 64, False)
hog_d = cv2.HOGDescriptor((8,8),        # image size
                        (8,8), (8,8), # frame size, frame steps
                        (8,8),        # cell size
                        9,            # number of bins 
                        *default_magic)
hist = hog_d.compute(digits.images[TEST_IMG].astype(np.uint8))
print(hist.flatten()) 
# pretty different -- no real way to compare short of reading the opencv source code

Note, in "full" HOG, there are two more layers of processing:
  * the normalization we did on line 41 ( `acc /= nla.norm(acc)` ) is applied over a frame of cells [we had the equivalent of one cell above]
  * when used for object detection (finding an object in an image) the full image will be broken into many smaller regions-of-interest that will be evaluated with HOG and compared with the target.

# SIFT

In [None]:
# within gaussian pyramid:
# look at "maxima" wrt space (across image) and scale (up-and-down DoG pyramid)
# size of circle is related to the level of Gaussian pyramid at which the keypoint was found
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
key_points = cv2.xfeatures2d.SIFT_create().detect(venice_g, None)

# venice = cv2.drawKeypoints(venice, key_points, venice)
venice = cv2.drawKeypoints(venice, key_points, venice, 
                           flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice)

In [None]:
# detect finds keypoints
# compute calculates the descriptors for those keypoints
# detectAndCompute does both in one call

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
key_points, descriptors = cv2.xfeatures2d.SIFT_create().detectAndCompute(venice_g, None)
# desc is shape:  (kp, 128)

# Features:  SURF

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
key_points = cv2.xfeatures2d.SURF_create().detect(venice_g, None)
print(len(key_points))

# FIXME what is the tradeoff of drawing on gray versus full-color
venice = cv2.drawKeypoints(venice, key_points, venice, 
                           flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice)

In [None]:
# surf likes blobs
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
surf = cv2.xfeatures2d.SURF_create()

print(surf.getHessianThreshold())
surf.setHessianThreshold(5000)

key_points = surf.detect(venice_g, None)
print(len(key_points))

venice_g = cv2.drawKeypoints(venice_g, key_points, venice_g, 
                             flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice_g)

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
surf = cv2.xfeatures2d.SURF_create()

surf.setHessianThreshold(5000)
surf.setUpright(True)
key_points = surf.detect(venice_g, None)
print(len(key_points))

venice_g = cv2.drawKeypoints(venice_g, key_points, venice_g, 
                             flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice_g)

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
surf = cv2.xfeatures2d.SURF_create()

surf.setHessianThreshold(5000)
surf.setUpright(True)
surf.setExtended(True) # 128 length descriptors

# detectAndCompute to get keypoints and descriptors at once
key_points, descriptors = surf.detectAndCompute(venice_g, None)
print(len((key_points)))

venice_g = cv2.drawKeypoints(venice_g, key_points, venice_g, 
                             flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice_g)

# Features:  FAST

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')

fig, axes = plt.subplots(1,3,figsize=(12,8))
my_show(axes[0], venice)

fast = cv2.FastFeatureDetector_create()

# find and draw the keypoints
key_points = fast.detect(venice, None)
print(fast.getThreshold(), fast.getNonmaxSuppression(), fast.getType(), len(key_points))
venice_kp = cv2.drawKeypoints(venice.copy(), key_points, None, color=(255,0,0))
my_show(axes[1], venice_kp)


fast.setNonmaxSuppression(False)
key_points = fast.detect(venice, None)
print(fast.getThreshold(), fast.getNonmaxSuppression(), fast.getType(), len(key_points))
venice_kp = cv2.drawKeypoints(venice.copy(), key_points, None, color=(255,0,0))
my_show(axes[2], venice_kp)

# Features:  Descriptors with BRIEF

In [None]:
# often descriptors are converted to strings for use with hamming coding
# brief goes directly to ham-able descriptors
venice = my_read(img_dir+'data/venice.jpg')

# Star = CenSurE (???) & brief
star = cv2.xfeatures2d.StarDetector_create()
brief = cv2.xfeatures2d.BriefDescriptorExtractor_create()

# find the keypoints with STAR & descriptors with BRIEF
key_points = star.detect(venice, None)
key_points, descriptors = brief.compute(venice, key_points)
print(len(key_points))

# ORB:  FAST + BRIEF without Patents!

In [None]:
venice, venice_g = my_read_cg(img_dir+'data/venice.jpg')
key_points = cv2.ORB_create().detect(venice_g, None)

print(cv2.DRAW_MATCHES_FLAGS_DEFAULT, len(key_points))
venice = cv2.drawKeypoints(venice, key_points, venice)

fig = plt.figure(figsize=(10,10))
my_show(plt.gca(), venice)

# Using Features:  Matching

##### Brute Force Matching

First ten (not necessarily best 10!) matches found using brute force.

In [None]:
box = my_read_g(img_dir+'data/box.png')
scene = my_read_g(img_dir+'data/box_in_scene.png')

# find the keypoints and descriptors with ORB
orb = cv2.ORB_create()
key_points_box,   descriptors_box   = orb.detectAndCompute(box,None)
key_points_scene, descriptors_scene = orb.detectAndCompute(scene,None)

# the split .().() calls look odd, but it is gaining traction in python land
# (it comes more from java/C++ land)
matches = (cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
              .match(descriptors_box, descriptors_scene))
matches.sort(key=op.attrgetter('distance'))
first_match = matches[0]
print(first_match.distance,  # how similar are descriptors
      first_match.trainIdx,  # which descriptor in source image?
      first_match.queryIdx,  # which descriptor in tgt image?
      first_match.imgIdx)    # which training image (if more than one)?


# Draw first 10 matches.
NO_SINGLES = cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS
match_img = cv2.drawMatches(box, key_points_box, scene, key_points_scene, 
                            matches[:10], None, 
                            flags=NO_SINGLES)
plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img)

Brute force SIFT matches left after applying "ratio test".

In [None]:
box = my_read_g(img_dir+'data/box.png')
scene = my_read_g(img_dir+'data/box_in_scene.png')

sift = cv2.xfeatures2d.SIFT_create()

# find the keypoints and descriptors with SIFT
key_points_box,   descriptors_box   = sift.detectAndCompute(box,None)
key_points_scene, descriptors_scene = sift.detectAndCompute(scene,None)

# only keep k=2 best matches per descriptor and then ratio test
# this "ratio test" shows up in D. Lowe's original SIFT paper
# so we mimic it here
matches = cv2.BFMatcher().knnMatch(descriptors_box, descriptors_scene, k=2)
# ugly: [m] ... drawMatchesKnn wants it
matches = [[m] for m,n in matches if m.distance < 0.75 * n.distance]
match_img = cv2.drawMatchesKnn(box, key_points_box, scene, key_points_scene, 
                               matches, None, 
                               flags=NO_SINGLES)
plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img);

##### KNN Matching using Approximate Nearest Neighbors

In [None]:
# parameters for sift/surf (if you want to experiment with it)
# FLANN_INDEX_KDTREE = 1
# index_params_siftsurf = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)

# parameters for orb (we'll use orb here)
# rec'd parameters in comments see here:
# https://docs.opencv.org/3.0-beta/modules/flann/doc/flann_fast_approximate_nearest_neighbor_search.html#flann-index-t-index

FLANN_INDEX_LSH = 6
index_params_orb = dict(algorithm = FLANN_INDEX_LSH, 
                        table_number = 6,      # 12
                        key_size = 12,         # 20
                        multi_probe_level = 1) # 2

# parameters for search process
search_params = dict(checks=50)   # or pass empty dictionary


box = my_read_g(img_dir+'data/box.png')
scene = my_read_g(img_dir+'data/box_in_scene.png')

# find the keypoints and descriptors with ORB
orb = cv2.ORB_create()
key_points_box,   descriptors_box   = orb.detectAndCompute(box, None)
key_points_scene, descriptors_scene = orb.detectAndCompute(scene,None)

# ugly:  Flann has randomization and it may return a singleton result
#        seems to do that about 1/5 of the time
#        so we loop until it returns valid results
matches = [[]]
while any(len(m) < 2 for m in matches):
    matches = (cv2.FlannBasedMatcher(index_params_orb, search_params)
                  .knnMatch(descriptors_box, descriptors_scene, k=2))
matches = [[m] for m,n in matches if m.distance < 0.75 * n.distance] 

draw_params = dict(matchColor = (0,255,0),
                   singlePointColor = (255,0,0),
                   flags = 0)
match_img = cv2.drawMatchesKnn(box, key_points_box, 
                               scene, key_points_scene, 
                               matches, None, 
                               **draw_params)
plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img);

# Features:  Homography

If we find an alignment between src and target points, we can compute a homography (a perspective transformation) between the two images.  This has many applications:  among them, we could normalize for differences in the the perspective of cameras in different scenes.

In [None]:
matches = [m[0] for m in matches] # undo ugliness

# use points to generate a homography; findHomography expects Nx1x2 inputs
# FIXME:  describe RANSAC
box_points   = np.array([key_points_box[m.queryIdx].pt for m in matches]).reshape(-1,1,2)
scene_points = np.array([key_points_scene[m.trainIdx].pt for m in matches]).reshape(-1,1,2)
M, inliers = cv2.findHomography(box_points, scene_points, cv2.RANSAC, 5.0)
inliers = inliers.ravel().tolist()

# now we set up a box in our original (src) and we map it to an 
# M transformed box in the target (via perspective transform)
r,c = box.shape
src_bounds = np.array([[0  ,   0],
                       [0  , r-1],
                       [c-1, r-1],
                       [c-1,   0]], dtype=np.float64).reshape(-1,1,2)
dst_bounds = cv2.perspectiveTransform(src_bounds, M)[np.newaxis,:,:,:]

# draw box around match in target scene
out_scene = cv2.polylines(scene, dst_bounds.astype(np.int64), True, 255, 10, cv2.LINE_AA)

# draw the points as well
draw_params = dict(matchColor = (0,255,0),
                   singlePointColor = None,
                   matchesMask = inliers, # draw only inlier connections
                   flags = 0)

match_img = cv2.drawMatches(box, key_points_box, 
                            out_scene, key_points_scene, 
                            matches, None, 
                            **draw_params)
plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img);

Combined example of the above steps:

In [None]:
box = my_read_g(img_dir+'data/box.png')
scene = my_read_g(img_dir+'data/box_in_scene.png')

# find the keypoints and descriptors with ORB
orb = cv2.ORB_create()
key_points_box,   descriptors_box   = orb.detectAndCompute(box,None)
key_points_scene, descriptors_scene = orb.detectAndCompute(scene,None)

# find key point matches using Flann
matches = (cv2.FlannBasedMatcher(index_params_orb, search_params)
              .knnMatch(descriptors_box, descriptors_scene, k=2))
matches = [m for m,n in matches if m.distance < 0.75 * n.distance]

In [None]:
# use points to generate a homography; findHomography expects Nx1x2 inputs
box_points   = np.array([key_points_box[m.queryIdx].pt for m in matches]).reshape(-1,1,2)
scene_points = np.array([key_points_scene[m.trainIdx].pt for m in matches]).reshape(-1,1,2)
M,inliers = cv2.findHomography(box_points, scene_points, cv2.RANSAC, 5.0)
inliers = inliers.ravel().tolist()

# create box in source and target
r,c = box.shape
src_bounds = np.array([[0  ,   0],
                       [0  , r-1],
                       [c-1, r-1],
                       [c-1,   0]],
                       dtype=np.float64).reshape(-1,1,2)
dst_bounds = cv2.perspectiveTransform(src_bounds, M)[np.newaxis,:,:,:]

# draw box around box in target scene
out_scene = cv2.polylines(scene, dst_bounds.astype(np.int64), True, 255, 10, cv2.LINE_AA)

# draw points as well
draw_params = dict(matchColor = (0,255,0),
                   singlePointColor = None,
                   matchesMask = inliers,
                   flags = NO_SINGLES)
match_img = cv2.drawMatches(box, key_points_box, out_scene, key_points_scene, 
                            matches, None, **draw_params)
plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img);

# Evaluation


|      |PredP| PredN | |
|------|-----|-----|-|
|RealP | TP | FN  | TP/(TP+FN)<br>sensitivity, recall<br>TPR |
|RealN | FP | TN  | TN/(FP+TN)<br>specificity<br>TNR |
|      |TP/(TP+FP)<br>precision| |

# Using Features:  A Naive, Handspun Classifier

In [None]:
# let's make a super-simple-problem:
# classify digits
# we'll do it like this:  
# 1:  convert digits to HOGs
# 2:  find the closest HOG to me (that isn't me)
# 3.  say that i'm whatever class that closest HOG is

# note:  this is 1-nearest-neighbors with leave-one-out-cross-validation 
#        (train on everyone else, test on me)

In [None]:
import numpy.linalg as nla
from sklearn import datasets
from sklearn import metrics

default_magic = (1, -1.0, 0, 0.2, 1, 64, False)
hog_d = cv2.HOGDescriptor((8,8),        # image size
                          (8,8), (8,8), # frame size, frame steps
                          (8,8),        # cell size
                          9,            # number of (orientation) bins 
                          *default_magic)

# hog_d=hog_d to: 
#  (1) not recreate every time and 
#  (2) not use global variable
def extract_features(images, hog_d=hog_d):
    hists = [hog_d.compute(img).squeeze() for img in images]
    data = np.stack(hists, 0)
    return data

# we reference the data by image to prevent making many copies
# of the data array that DON'T have the image we're looking for
# NOTE:  this means we must clean up our results to prevent
#        trivial success of predicting our own (known) class
def predict_one(data, tgts, tst_index):
    # calculate distance from me (data[tst_index])
    # to everyone (including myself)
    # and pick out the top two
    similarities = nla.norm(data[tst_index] - data, axis=1)
    top_two = np.argpartition(similarities, 2)[:2]
    
    # remove me from the top two and double check i'm not there
    prediction_idx = top_two[top_two!=tst_index][0]  # can't guess ourself
    assert prediction_idx != tst_index               # double-double check

    # return the second best overall
    # (which is the best that is not-me)
    return tgts[prediction_idx]

In [None]:
digits = datasets.load_digits()
images = digits.images.astype(np.uint8)

In [None]:
# %%timeit -r1
# for reference, this cell takes: ~ 247 ms
# that'll be very interesting to compare with some results from next week
data = extract_features(images)

# ugly, please don't tell anyone we did this:
# predict_one(data, digits.target, 0)
predicted_classes = [predict_one(data, digits.target, idx) for idx in range(len(images))]

In [None]:
print(len(predicted_classes) == len(digits.target))

In [None]:
cm = metrics.confusion_matrix(digits.target, predicted_classes)
print("confusion matrix:\n", cm)

import seaborn as sns
sns.heatmap(cm, annot=True, fmt='3d')
plt.gca().set_ylabel('Actual')
plt.gca().set_xlabel('Predicted');

# YAY!  err, um?  how did we do?

In [None]:
# we can simplify a bit by just considering "zero" as our only class of interest
# so, we have 0 and not-0
expected_classes_z  = np.where(digits.target == 0, 0, 1)  # 1 for "any other digit"
predicted_classes_z = np.where(np.array(predicted_classes) == 0, 0, 1)

cm = metrics.confusion_matrix(expected_classes_z, predicted_classes_z)

sns.heatmap(cm, annot=True, fmt='3d')
plt.gca().set_ylabel('Actual')
plt.gca().set_xlabel('Predicted');

In [None]:
recall = cm[0,0] / cm[0,:].sum()    # correct against row (value for a real 0)
precision = cm[0,0] / cm[:,0].sum() # correct against col (value for a pred 0)
precision, recall

Now we're in a better position to understand the output of the "full" classification report.  This is the comparison of one class taken against all the other for each of the different digits.  Overall, we didn't do too badly.

In [None]:
print(metrics.classification_report(digits.target, predicted_classes))
# note: avg here is not a simple average of the raw precisions:
#       it takes into account the various TPs wrt to the population
#       prevelence (all of those digits) and total predictions

# Exercises

##### Shi Tomasi on a Checkers Board

Since Shi-Tomasi seems like a relatively simple, direct method, let's see if its operation matches our intuition.  What happens when we run Shi-Tomasi on a checkers board?

In [None]:
corners = cv2.goodFeaturesToTrack(board,25,0.01,10).squeeze().astype(np.int32)
scene = cv2.cvtColor(board, cv2.COLOR_GRAY2RGB)
[cv2.circle(scene, (x,y), 10, [255,0,0], -1) for x,y in corners] # -1 -> filled circle
my_show(plt.gca(), scene)

# argh, why didn't that find all the corners?  can you find an argument we need to tweek?

In [None]:
# Student section here 


##### Features for Matching

If you go back to the outdoors image from week one, we can use that as another example of direct matching of features in an image.  Try it out.  That is, try to match the rider to the position in the scene.  While you're at it, try and factor the "script-cell" into a function that takes in an overall scene and a target/roi/box as input and returns the matched features and the match image that we displayed above.  Then, we can use that function instead of just cutting and pasting that code many time when you want to do the matching.

In [None]:
outdoors = my_read(img_dir+'data/farm-drop.jpg')
roi = outdoors[81:800, 1001:1500]

In [None]:
def match_helper(scene, box):
    FLANN_INDEX_LSH = 6
    index_params_orb = dict(algorithm = FLANN_INDEX_LSH, 
                            table_number = 6,      # 12
                            key_size = 12,         # 20
                            multi_probe_level = 1) # 2

    # parameters for search process
    search_params = dict(checks=50)   # or pass empty dictionary

    # find the keypoints and descriptors with ORB
    orb = cv2.ORB_create()
    key_points_box,   descriptors_box   = orb.detectAndCompute(box, None)
    key_points_scene, descriptors_scene = orb.detectAndCompute(scene,None)
    print(len(key_points_box))

    # repeat if we have a failure
    matches = [[]]
    while any(len(m) < 2 for m in matches):
        matches = (cv2.FlannBasedMatcher(index_params_orb, search_params)
                      .knnMatch(descriptors_box, descriptors_scene, k=2))
    matches = [[m] for m,n in matches if m.distance < 0.75 * n.distance] 

    draw_params = dict(matchColor = (0,255,0),
                       singlePointColor = (255,0,0),
                       flags = 2)#changed flags=0 to flags=2
    match_img = cv2.drawMatchesKnn(box, key_points_box, 
                                   scene, key_points_scene, 
                                   matches, None, **draw_params)
    return matches, match_img

In [None]:
# DO IT!
outdoors = my_read_g(img_dir+'data/farm-drop.jpg')
biker    =  outdoors[81:800, 1001:1500]

matches, match_img = match_helper(outdoors, biker)

plt.figure(figsize=(15,10))
my_show(plt.gca(), match_img);

##### Evaluating Evaluation 

Above, we saw the following confusion matrix:

In [None]:
images = digits.images.astype(np.uint8)

data = extract_features(images)
predicted_classes = [predict_one(data, digits.target, idx) for idx in range(len(images))]

cm = metrics.confusion_matrix(digits.target, predicted_classes)
sns.heatmap(cm, annot=True, fmt='3d')
plt.gca().set_ylabel('Actual')
plt.gca().set_xlabel('Predicted');

What makes 1 and 7 relatively different from other digits?  Why do you think they get confused for each other?  What other rows (source population) and columns (predictions) show some distinct behavior off the diagonal?  What might explain these difficulties?  Would you have noticed these relationships between reality and predictions if you had *only* used an evaluation scheme like accuracy or AUC?

##### Features Shoot-out Showdown

Let's propose a grand tournament of the following HOG-based feature generating methods:

      * undirected, 9 bin
      * directed, 9 bins
      * undirected, 18 bins
      * directed, 18 bins

We'll evaluate semi-quantitatively by looking at the confusion matrices and quantitatively by looking at the accuracies.  It is reasonable to use accuracy here because the occurances of the classes are fairly balanced.

In [None]:
# lots of placeholder arguments
hog_base_args = ((8,8), (8,8), (8,8), (8,8))
hog_add_args  = (1, -1.0, 0, 0.2, 1, 64)

preds = {}
for dness, bins in it.product([False, True], [9, 18]):
    # add in experimental arguments
    full_args = hog_base_args + (bins,) + hog_add_args + (dness,)
    hog_d = cv2.HOGDescriptor(*full_args)
    
    data = extract_features(images, hog_d=hog_d)
    predicted_classes = [predict_one(data, digits.target, idx) for idx in range(len(images))]
    
    preds[(dness, bins)] = predicted_classes

In [None]:
fig, axes = plt.subplots(2,2, figsize=(10,10))

for ax, cnds in zip(axes.flat, preds):
    acc = metrics.accuracy_score(digits.target, preds[cnds])
    
    cm = metrics.confusion_matrix(digits.target, preds[cnds])
    sns.heatmap(cm, annot=True, fmt='3d', ax=ax)
    
    ax.set_ylabel('Actual')
    ax.set_xlabel('Predicted');
    ax.set_title("Directed,Bins={} | Acc={:5.4f}".format(cnds, acc))

fig.tight_layout()

## Image stitching

For this tutorial we'll need the `imutils` package. You can install this by using:

```
conda install -c mlgill imutils 
```

In [None]:
import imutils

In [None]:
class Stitcher:
    def __init__(self):
        # determine if we are using OpenCV v3.X
        self.isv3 = imutils.is_cv3()

    def stitch(self, images, ratio=0.75, reprojThresh=4.0,
        showMatches=False):
        # unpack the images, then detect keypoints and extract
        # local invariant descriptors from them
        (imageB, imageA) = images
        (kpsA, featuresA) = self.detectAndDescribe(imageA)
        (kpsB, featuresB) = self.detectAndDescribe(imageB)
 
        # match features between the two images
        M = self.matchKeypoints(kpsA, kpsB,
            featuresA, featuresB, ratio, reprojThresh)
 
        # if the match is None, then there aren't enough matched
        # keypoints to create a panorama
        if M is None:
            return None

        # otherwise, apply a perspective warp to stitch the images
        # together
        (matches, H, status) = M
        result = cv2.warpPerspective(imageA, H,
            (imageA.shape[1] + imageB.shape[1], imageA.shape[0]))
        result[0:imageB.shape[0], 0:imageB.shape[1]] = imageB
 
        # check to see if the keypoint matches should be visualized
        if showMatches:
            vis = self.drawMatches(imageA, imageB, kpsA, kpsB, matches,
                status)
 
            # return a tuple of the stitched image and the
            # visualization
            return (result, vis)
 
        # return the stitched image
        return result
    
    def detectAndDescribe(self, image):
        # convert the image to grayscale
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 
        # check to see if we are using OpenCV 3.X
        if self.isv3:
            # detect and extract features from the image
            descriptor = cv2.xfeatures2d.SIFT_create()
            (kps, features) = descriptor.detectAndCompute(image, None)
 
        # otherwise, we are using OpenCV 2.4.X
        else:
            # detect keypoints in the image
            detector = cv2.FeatureDetector_create("SIFT")
            kps = detector.detect(gray)
 
            # extract features from the image
            extractor = cv2.DescriptorExtractor_create("SIFT")
            (kps, features) = extractor.compute(gray, kps)
 
        # convert the keypoints from KeyPoint objects to NumPy
        # arrays
        kps = np.float32([kp.pt for kp in kps])
 
        # return a tuple of keypoints and features
        return (kps, features)
    
    def matchKeypoints(self, kpsA, kpsB, featuresA, featuresB,
        ratio, reprojThresh):
        # compute the raw matches and initialize the list of actual
        # matches
        matcher = cv2.DescriptorMatcher_create("BruteForce")
        rawMatches = matcher.knnMatch(featuresA, featuresB, 2)
        matches = []

        # loop over the raw matches
        for m in rawMatches:
            # ensure the distance is within a certain ratio of each
            # other (i.e. Lowe's ratio test)
            if len(m) == 2 and m[0].distance < m[1].distance * ratio:
                matches.append((m[0].trainIdx, m[0].queryIdx))

        # computing a homography requires at least 4 matches
        if len(matches) > 4:
            # construct the two sets of points
            ptsA = np.float32([kpsA[i] for (_, i) in matches])
            ptsB = np.float32([kpsB[i] for (i, _) in matches])

            # compute the homography between the two sets of points
            (H, status) = cv2.findHomography(ptsA, ptsB, cv2.RANSAC,
                reprojThresh)

            # return the matches along with the homograpy matrix
            # and status of each matched point
            return (matches, H, status)

        # otherwise, no homograpy could be computed
        return None

    def drawMatches(self, imageA, imageB, kpsA, kpsB, matches, status):
        # initialize the output visualization image
        (hA, wA) = imageA.shape[:2]
        (hB, wB) = imageB.shape[:2]
        vis = np.zeros((max(hA, hB), wA + wB, 3), dtype="uint8")
        vis[0:hA, 0:wA] = imageA
        vis[0:hB, wA:] = imageB

        # loop over the matches
        for ((trainIdx, queryIdx), s) in zip(matches, status):
            # only process the match if the keypoint was successfully
            # matched
            if s == 1:
                # draw the match
                ptA = (int(kpsA[queryIdx][0]), int(kpsA[queryIdx][1]))
                ptB = (int(kpsB[trainIdx][0]) + wA, int(kpsB[trainIdx][1]))
                cv2.line(vis, ptA, ptB, (0, 255, 0), 1)

        # return the visualization
        return vis

In [None]:
# load the two images and resize them to have a width of 400 pixels
# (for faster processing)
imageA = cv2.imread("./images/bryce_left_01.png")
imageB = cv2.imread("./images/bryce_right_01.png")
imageA = imutils.resize(imageA, width=400)
imageB = imutils.resize(imageB, width=400)

In [None]:
stitcher = Stitcher()
(result, vis) = stitcher.stitch([imageA, imageB], showMatches=True)

# show the images
my_show(plt.gca(), imageA, title="Image A")

In [None]:
my_show(plt.gca(), imageB, title="Image B")

In [None]:
my_show(plt.gca(), vis, title="Keypoint Matches")

In [None]:
my_show(plt.gca(), result, title="Result")