In [1]:
# To install OpenCV3: type "conda install -c menpo opencv3=3.1.0" on terminal
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import os
import glob
import random
from PIL import Image
import scipy.ndimage.filters as filters
from skimage.feature import canny

from skimage.feature import corner_harris, corner_peaks
from sklearn import linear_model
from skimage.transform import warp, AffineTransform
from skimage.measure import ransac
from skimage.util.shape import view_as_windows
from numpy.linalg import norm as normalize
import cv2

if not os.path.exists("Q2"):
    os.makedirs("Q2")
    
if not os.path.exists("Q2/Roadsign"):
    os.makedirs("Q2/Roadsign")
    
if not os.path.exists("Q2/Starbucks"):
    os.makedirs("Q2/Starbucks")
    
if not os.path.exists("Q2/Superman"):
    os.makedirs("Q2/Superman")

# Initial Code

In [8]:
# turns .gif files to .png, and returns the resultant .png filepath. 
# reference: http://stackoverflow.com/questions/6689380/how-to-change-gif-file-to-png-file-using-python-pil
def gifToPng(fn):
    files = glob.glob(fn) 

    for imageFile in files:
        filepath,filename = os.path.split(imageFile)
        filterame,exts = os.path.splitext(filename)
        print "Processing: " + imageFile,filterame
        im = Image.open(imageFile)
        tempPath = 'Images/Q2/Starbucks/'+filterame+'.png'
        im.save( tempPath,'PNG')
    
        return tempPath
    
def createPyramids(image, pyr_number):
    
    G = image.copy()
    pyrGauss = [G]
    
    #creating Gaussian Pyramid
    for i in range(1,pyr_number):
        G = cv2.pyrDown(G)
        pyrGauss.append(G)
        
    return pyrGauss

def detectObject(img, temp):
    
    MIN_MATCH_COUNT = 1
    imageOri = cv2.imread(img,0)
    
    template = cv2.imread(temp,0)
    image = cv2.imread(img,0)
    
    sift = cv2.xfeatures2d.SIFT_create()

    # find the keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(template,None)
    kp2, des2 = sift.detectAndCompute(image,None)

    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks = 50)

    #flann = cv2.FlannBasedMatcher(index_params, search_params)
    #matches = flann.knnMatch(des1,des2,k=2)
    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des1, des2, k=2)

    # store all the good matches as per Lowe's ratio test.
    good = []
    for m,n in matches:
        if m.distance < 0.7*n.distance:
            good.append(m)
            
    # If there are more than "MIN_MATCH_COUNT" matches, find their perspective transform
    if len(good)>MIN_MATCH_COUNT:
        src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
        dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)

        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
        if mask != None:
            matchesMask = mask.ravel().tolist()

            h,w = template.shape
            pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
            dst = cv2.perspectiveTransform(pts,M)
    
            imageOri = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
            imageOri = cv2.polylines(imageOri,[np.int32(dst)],True,(0,255,0),2)
    
    else:
        print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
        matchesMask = None
    
    ## Uncomment to see bounding box with matching points
    '''
    draw_params = dict(matchColor = (0,255,0), # draw matches in green color
                       singlePointColor = None,
                       matchesMask = matchesMask, # draw only inliers
                       flags = 2)
    img3 = cv2.drawMatches(template,kp1,image,kp2,good,None,**draw_params)
    '''
    plt.imshow(imageOri, cmap="gray") 
    fn = img.split('/')[-2] + '/' + img.split('/')[-1].split('.')[0] + '_detected.jpg'
    plt.savefig('Q2/' + fn, dpi=200)
    print fn
    plt.close()
    


In [9]:
if __name__ == '__main__':
    
    # Detect Roadsigns
    imgPath = './Images/Q2/Roadsign/image'
    tempPath = './Images/Q2/Roadsign/template.jpg'
    #tempPath = './Images/Q2/Roadsign/template.jpg'
    #tempPath = cv2.imread('./Images/Q2/Roadsign/template.jpg',0)
    
    for i in range(1, 13):
        #if i == 6 or i == 7 or i == 8 or i == 12:
        #    tempPath = createPyramids(tempPath,5)[1]
        detectObject(imgPath + str(i) + '.jpg', tempPath)
    detectObject('./Images/Q2/Roadsign/lollipop-man.jpg', tempPath)
    
    # Detect Starbucks
    imgPath = './Images/Q2/Starbucks/image'
    tempPath = gifToPng('./Images/Q2/Starbucks/template.gif')
    for i in range(1, 17):
        if i == 2 or i == 12:
            detectObject(imgPath + str(i) + '.jpg', tempPath)
        detectObject(imgPath + str(i) + '.jpg', tempPath)
        
    # Detect Superman
    imgPath = './Images/Q2/Superman/image'
    tempPath = './Images/Q2/Superman/template.jpg'
    for i in range(1, 10):
        detectObject(imgPath + str(i) + '.jpg', tempPath)



Roadsign/image1_detected.jpg
Roadsign/image2_detected.jpg
Roadsign/image3_detected.jpg
Roadsign/image4_detected.jpg
Roadsign/image5_detected.jpg
Roadsign/image6_detected.jpg
Roadsign/image7_detected.jpg
Roadsign/image8_detected.jpg
Roadsign/image9_detected.jpg
Roadsign/image10_detected.jpg
Roadsign/image11_detected.jpg
Roadsign/image12_detected.jpg
Roadsign/lollipop-man_detected.jpg
Processing: ./Images/Q2/Starbucks/template.gif template
Starbucks/image1_detected.jpg
Starbucks/image2_detected.jpg
Starbucks/image2_detected.jpg
Starbucks/image3_detected.jpg
Starbucks/image4_detected.jpg
Starbucks/image5_detected.jpg
Starbucks/image6_detected.jpg
Starbucks/image7_detected.jpg
Starbucks/image8_detected.jpg
Starbucks/image9_detected.jpg
Starbucks/image10_detected.jpg
Starbucks/image11_detected.jpg
Starbucks/image12_detected.jpg
Starbucks/image12_detected.jpg
Starbucks/image13_detected.jpg
Starbucks/image14_detected.jpg
Starbucks/image15_detected.jpg
Starbucks/image16_detected.jpg
Superman/i

# Improved Modification

In [12]:
# turns .gif files to .png, and returns the resultant .png filepath. 
# reference: http://stackoverflow.com/questions/6689380/how-to-change-gif-file-to-png-file-using-python-pil
def gifToPng(fn):
    files = glob.glob(fn) 

    for imageFile in files:
        filepath,filename = os.path.split(imageFile)
        filterame,exts = os.path.splitext(filename)
        print "Processing: " + imageFile,filterame
        im = Image.open(imageFile)
        tempPath = 'Images/Q2/Starbucks/'+filterame+'.png'
        im.save( tempPath,'PNG')
    
        return tempPath
    
def createPyramids(image, pyr_number):
    
    G = image.copy()
    pyrGauss = [G]
    
    #creating Gaussian Pyramid
    for i in range(1,pyr_number):
        G = cv2.pyrDown(G)
        pyrGauss.append(G)
        
    return pyrGauss

def getKP(image, template):
    sift = cv2.xfeatures2d.SIFT_create()

    # find the keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(template,None)
    kp2, des2 = sift.detectAndCompute(image,None)

    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des1, des2, k=2)

    # store all the good matches as per Lowe's ratio test.
    good = []
    for m,n in matches:
        if m.distance < 0.7*n.distance:
            good.append(m)
            
    # If there are more than "MIN_MATCH_COUNT" matches, find their perspective transform
    src_pts = [ kp1[m.queryIdx].pt for m in good ]
    dst_pts = [ kp2[m.trainIdx].pt for m in good ]
    
    return src_pts, dst_pts

def detectObject(img, temp):
    
    MIN_MATCH_COUNT = 1
    imageOri = cv2.imread(img,0)
    
    template = cv2.imread(temp,0)
    #template = cv2.Canny(template, 30, 200)
    
    image = cv2.imread(img,0)

    imageDenoise = cv2.imread(img)
    imageDenoise = cv2.fastNlMeansDenoisingColored(imageDenoise,None,10,10,7,21)
    imageDenoise = cv2.cvtColor(imageDenoise, cv2.COLOR_RGB2GRAY)
    
    imageContrast = cv2.imread(img,0)
    clahe = cv2.createCLAHE(clipLimit=0.5, tileGridSize=(8,8))
    imageContrast = clahe.apply(imageContrast)
    
    imageDenoiseAndContrast = clahe.apply(imageDenoise)
        
    templateBlur = cv2.imread(temp,0)
    templateBlur = cv2.GaussianBlur(templateBlur,(5,5),0)
    
    templateDenoise = cv2.imread(temp)
    templateDenoise = cv2.fastNlMeansDenoisingColored(templateDenoise,None,10,10,7,21)
    templateDenoise = cv2.cvtColor(templateDenoise, cv2.COLOR_RGB2GRAY)
    
    templateDenoiseAndBlur = cv2.GaussianBlur(templateDenoise,(5,5),0)
    
    templateTranslate = cv2.imread(temp,0)
    rows,cols = templateTranslate.shape
    M = np.float32([[1,0,0],[0,1,-100]])
    templateTranslate = cv2.warpAffine(templateTranslate,M,(cols,rows))
    
    templateBlurAndTranslate = cv2.warpAffine(templateBlur,M,(cols,rows))
    
    src_pts,dst_pts = getKP(image, templateBlur)
    s,d = getKP(image, templateDenoise)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(image, templateDenoiseAndBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(image, templateTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(image, templateBlurAndTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoise, templateBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoise, templateDenoiseAndBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoise, templateDenoise)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoise, templateTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoise, templateBlurAndTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageContrast, templateBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageContrast, templateDenoise)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageContrast, templateDenoiseAndBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageContrast, templateTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageContrast, templateBlurAndTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoiseAndContrast, templateBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoiseAndContrast, templateDenoise)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoiseAndContrast, templateDenoiseAndBlur)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoiseAndContrast, templateTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    s,d = getKP(imageDenoiseAndContrast, templateBlurAndTranslate)
    src_pts.extend(s)
    dst_pts.extend(d)
    
    src_pts = np.float32(src_pts).reshape(-1,1,2)
    dst_pts = np.float32(dst_pts).reshape(-1,1,2)
    
    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
    if mask != None:
        matchesMask = mask.ravel().tolist()

        h,w = template.shape
        pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
        dst = cv2.perspectiveTransform(pts,M)
    
        imageOri = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
        imageOri = cv2.polylines(imageOri,[np.int32(dst)],True,(0,255,0),2)
    
    
    plt.imshow(imageOri, cmap="gray") 
    fn = img.split('/')[-2] + '/' + img.split('/')[-1].split('.')[0] + '_detected.jpg'
    plt.savefig('Q2/' + fn, dpi=200)
    print fn
    plt.close()
    


In [13]:
if __name__ == '__main__':
    
    # Detect Roadsigns
    imgPath = './Images/Q2/Roadsign/image'
    tempPath = './Images/Q2/Roadsign/template.jpg'
    #tempPath = './Images/Q2/Roadsign/template.jpg'
    #tempPath = cv2.imread('./Images/Q2/Roadsign/template.jpg',0)
    
    for i in range(1, 13):
        #if i == 6 or i == 7 or i == 8 or i == 12:
        #    tempPath = createPyramids(tempPath,5)[1]
        detectObject(imgPath + str(i) + '.jpg', tempPath)
    detectObject('./Images/Q2/Roadsign/lollipop-man.jpg', tempPath)
    
    # Detect Starbucks
    imgPath = './Images/Q2/Starbucks/image'
    tempPath = gifToPng('./Images/Q2/Starbucks/template.gif')
    for i in range(1, 17):
        if i == 2 or i == 12:
            detectObject(imgPath + str(i) + '.jpg', tempPath)
        detectObject(imgPath + str(i) + '.jpg', tempPath)
        
    # Detect Superman
    imgPath = './Images/Q2/Superman/image'
    tempPath = './Images/Q2/Superman/template.jpg'
    for i in range(1, 10):
        detectObject(imgPath + str(i) + '.jpg', tempPath)



Roadsign/image1_detected.jpg
Roadsign/image2_detected.jpg
Roadsign/image3_detected.jpg
Roadsign/image4_detected.jpg
Roadsign/image5_detected.jpg
Roadsign/image6_detected.jpg
Roadsign/image7_detected.jpg
Roadsign/image8_detected.jpg
Roadsign/image9_detected.jpg
Roadsign/image10_detected.jpg
Roadsign/image11_detected.jpg
Roadsign/image12_detected.jpg
Roadsign/lollipop-man_detected.jpg
Processing: ./Images/Q2/Starbucks/template.gif template
Starbucks/image1_detected.jpg
Starbucks/image2_detected.jpg
Starbucks/image2_detected.jpg
Starbucks/image3_detected.jpg
Starbucks/image4_detected.jpg
Starbucks/image5_detected.jpg
Starbucks/image6_detected.jpg
Starbucks/image7_detected.jpg
Starbucks/image8_detected.jpg
Starbucks/image9_detected.jpg
Starbucks/image10_detected.jpg
Starbucks/image11_detected.jpg
Starbucks/image12_detected.jpg
Starbucks/image12_detected.jpg
Starbucks/image13_detected.jpg
Starbucks/image14_detected.jpg
Starbucks/image15_detected.jpg
Starbucks/image16_detected.jpg
Superman/i

# Appendix (not used)

Transforming Image before matching (slightly improved but need individual modification; not robust)
Tried gaussian pyramid ver of template

In [None]:
# turns .gif files to .png, and returns the resultant .png filepath. 
# reference: http://stackoverflow.com/questions/6689380/how-to-change-gif-file-to-png-file-using-python-pil
def gifToPng(fn):
    files = glob.glob(fn) 

    for imageFile in files:
        filepath,filename = os.path.split(imageFile)
        filterame,exts = os.path.splitext(filename)
        print "Processing: " + imageFile,filterame
        im = Image.open(imageFile)
        tempPath = 'Images/Q2/Starbucks/'+filterame+'.png'
        im.save( tempPath,'PNG')
    
        return tempPath
    
def createPyramids(image, pyr_number):
    
    G = image.copy()
    pyrGauss = [G]
    
    #creating Gaussian Pyramid
    for i in range(1,pyr_number):
        G = cv2.pyrDown(G)
        pyrGauss.append(G)
        
    return pyrGauss

def detectObject(img, temp, denoise):
    
    MIN_MATCH_COUNT = 1
    imageOri = cv2.imread(img,0)
    
    template = cv2.imread(temp,0)
    #template = cv2.Canny(template, 30, 200)
    if (denoise == True):
        image = cv2.imread(img)
        image = cv2.fastNlMeansDenoisingColored(image,None,10,10,7,21)
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        clahe = cv2.createCLAHE(clipLimit=0.5, tileGridSize=(8,8))
        image = clahe.apply(image)
    else:    
        image = cv2.imread(img,0) 
        #image = cv2.Canny(image, 30, 200)
        #image = cv2.bilateralFilter(image,9,75,75)
        clahe = cv2.createCLAHE(clipLimit=0.5, tileGridSize=(8,8))
        image = clahe.apply(image)
    
    sift = cv2.xfeatures2d.SIFT_create()

    # find the keypoints and descriptors with SIFT
    kp1, des1 = sift.detectAndCompute(template,None)
    kp2, des2 = sift.detectAndCompute(image,None)

    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks = 50)

    #flann = cv2.FlannBasedMatcher(index_params, search_params)
    #matches = flann.knnMatch(des1,des2,k=2)
    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des1, des2, k=2)

    # store all the good matches as per Lowe's ratio test.
    good = []
    for m,n in matches:
        if m.distance < 0.7*n.distance:
            good.append(m)
            
    # If there are more than "MIN_MATCH_COUNT" matches, find their perspective transform
    if len(good)>MIN_MATCH_COUNT:
        src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
        dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)

        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
        if mask != None:
            matchesMask = mask.ravel().tolist()

            h,w = template.shape
            pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
            dst = cv2.perspectiveTransform(pts,M)
    
            imageOri = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
            imageOri = cv2.polylines(imageOri,[np.int32(dst)],True,(0,255,0),2)
    
    else:
        print "Not enough matches are found - %d/%d" % (len(good),MIN_MATCH_COUNT)
        matchesMask = None
    
    ## Uncomment to see bounding box with matching points
    '''
    draw_params = dict(matchColor = (0,255,0), # draw matches in green color
                       singlePointColor = None,
                       matchesMask = matchesMask, # draw only inliers
                       flags = 2)
    img3 = cv2.drawMatches(template,kp1,image,kp2,good,None,**draw_params)
    '''
    plt.imshow(imageOri, cmap="gray") 
    fn = img.split('/')[-2] + '/' + img.split('/')[-1].split('.')[0] + '_detected.jpg'
    plt.savefig('Q2/' + fn, dpi=200)
    print fn
    plt.close()
    


Bounding box drawn have bad results

In [22]:
'''
    
def detectObject(img, temp):
    
    template = cv2.imread(temp,0)
    image = cv2.imread(img,0)
    
    # Find Matched Feature Points
    sift = cv2.xfeatures2d.SIFT_create()
    #sift = cv2.SIFT()
    
    # find keypoints and descriptors
    kp1, des1 = sift.detectAndCompute(template,None)
    kp2, des2 = sift.detectAndCompute(image,None)
    
    # FLANN parameters
    FLANN_INDEX_KDTREE = 0
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks=50)

    bf = cv2.BFMatcher()
    matches = bf.knnMatch(des1, des2, k=2)
    #flann = cv2.FlannBasedMatcher(index_params,search_params)
    #matches = flann.knnMatch(des1,des2,k=2)

    good = []

    # ratio test as per Lowe's paper. Store good matches and matched points
    for i,(m,n) in enumerate(matches):
        if m.distance < 0.7*n.distance:
            good.append(m)
            
    # If there are more than 10 matches, find their perspective transform
    src_pts = np.float32([ kp1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
    dst_pts = np.float32([ kp2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)

    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
    matchesMask = mask.ravel().tolist()

    h,w = image.shape
    pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
    dst = cv2.perspectiveTransform(pts,M)
    
    image = cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)
    image = cv2.polylines(image,[np.int32(dst)],True,(0,255,0),3)
    
    draw_params = dict(matchColor = (0,255,0), # draw matches in green color
                       singlePointColor = None,
                       matchesMask = matchesMask, # draw only inliers
                       flags = 2)

    img3 = cv2.drawMatches(template,kp1,image,kp2,good,None,**draw_params)

    plt.imshow(img3, cmap = 'gray') 
    fn = img.split('/')[-2] + '/' + img.split('/')[-1].split('.')[0] + '_detected.jpg'
    plt.savefig('Q2/' + fn, dpi=200)
    print fn
    plt.close()
    
    #else:
    #    print "Not enough matches are found in %s - %d/%d" % (img,len(good),MIN_MATCH_COUNT)
    #    matchesMask = None
      
    # Draw Matches    
    draw_params = dict(matchColor = (0,255,0), # draw matches in green color
                   singlePointColor = None,
                   matchesMask = matchesMask, # draw only inliers
                   flags = 2)

    matchedImage = cv2.drawMatches(image,kp1,template,kp2,good,None,**draw_params)
    plt.imshow(matchedImage, 'gray') 
    fn = img.split('/')[-2] + '/' + img.split('/')[-1].split('.')[0] + '_matched.jpg'
    plt.savefig('Q2/' + fn, dpi=200)
    plt.close()
    '''
    


Processing: ./Images/Q2/Starbucks/template.gif template


AttributeError: 'NoneType' object has no attribute 'ravel'