In [205]:
%matplotlib inline
import cv2
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import itertools
import time
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
import math

In [2]:
def detect_match(algorithm,query_img,train_img,min_match_count,verbose=False):
    query_image = query_img
    train_image = train_img
    key_pts1,des1 = algorithm(query_image,None)
    key_pts2,des2 = algorithm(train_image,None)
    msed=np.inf
    if not (isinstance(des1,np.float32)&isinstance(des2,np.float32)):
        des1 = np.float32(des1)
        des2 = np.float32(des2)
    
    flann_idx = 1
    index_params = dict(algorithm = flann_idx, trees = 5)
    search_params = dict(checks = 50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)

    matches = flann.knnMatch(des1,des2,k=2)

    # store all the good matches as per Lowe's ratio test.
    good = [m for m,n in matches if m.distance < 0.7*n.distance]
    
    if len(good)>min_match_count:
        src_pts = np.float32([ key_pts1[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
        dst_pts = np.float32([ key_pts2[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
        if M is None:
            return [0],[0],[0],[0],np.inf
        #print(M,mask)
        matchesMask = mask.ravel().tolist()
        h,w = query_image.shape
        pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
        dst = cv2.perspectiveTransform(pts,M)
        msed = np.mean([np.sqrt(np.sum(diff)) for diff in (np.power(pts-dst,2))]/(np.sqrt(h**2+w**2)))
        #cv2.polylines(train_image,[np.int32(dst)],True,255,3, cv2.LINE_AA)
    else:
        if verbose==True:
            print( "Not enough matches are found - {}/{}".format(len(good), min_match_count) )
        matchesMask = [0]
    
    return key_pts1, key_pts2, good, matchesMask, msed


In [3]:
alg_dict=dict(sift=cv2.xfeatures2d.SIFT_create().detectAndCompute,
              surf=cv2.xfeatures2d.SURF_create(400).detectAndCompute)
min_matches=10

In [174]:
data_folder1 = os.path.join(os.curdir,'dayKvadr')
data_set1 = [os.path.join(data_folder1,file) for file in os.listdir(data_folder1) if os.path.isfile(os.path.join(data_folder1,file))]
data_folder2 = os.path.join(os.curdir,'dayPony')
data_set2 = [os.path.join(data_folder2,file) for file in os.listdir(data_folder2) if os.path.isfile(os.path.join(data_folder2,file))]
data_folder3 = os.path.join(os.curdir,'nightKvadr')
data_set3 = [os.path.join(data_folder3,file) for file in os.listdir(data_folder3) if os.path.isfile(os.path.join(data_folder3,file))]
data_folder4 = os.path.join(os.curdir,'nightPony')
data_set4 = [os.path.join(data_folder4,file) for file in os.listdir(data_folder4) if os.path.isfile(os.path.join(data_folder4,file))]
data_folder5 = os.path.join(os.curdir,'train')
data_set5 = [os.path.join(data_folder5,file) for file in os.listdir(data_folder5) if os.path.isfile(os.path.join(data_folder5,file))]
data_folder6 = os.path.join(os.curdir,'test')
data_set6 = [os.path.join(data_folder6,file) for file in os.listdir(data_folder6) if os.path.isfile(os.path.join(data_folder6,file))]


In [110]:
train = [cv2.imread(image,0) for image in data_set5]

In [113]:
inliers_matches_train = dict()

inliers_matches_train = [[] for i in range(len(train))]

In [114]:
for alg_name, alg in alg_dict.items():
    for im_i_idx, image_i in enumerate(train):
        _, __, match, inlier ,msd = detect_match(algorithm=alg,query_img=image_i,train_img=train[5],
                                                     min_match_count=10)
        if match==0:
            match=np.inf
        inliers_matches_train[im_i_idx].append(np.sum(inlier)/(len(match)))

  import sys


In [184]:
# Write result column for train dataset
Y_train = []
for i in range(len(data_set5)):
    if(int((data_set5[i].split("_")[1]).split(".")[0]) >= 3210):
        Y_train.append(1)
    else:
        Y_train.append(0)

In [123]:
# Change all NaN to 0.0
for i in range(len(inliers_matches_train)):
    for j in range(len(inliers_matches_train[i])):
        if(math.isnan(inliers_matches_train[i][j])): 
            inliers_matches_train[i][j] = 0.0

In [165]:
test = [cv2.imread(image,0) for image in data_set6]

In [166]:
inliers_matches_test = dict()
inliers_matches_test = [[] for i in range(len(test))]

In [167]:
for alg_name, alg in alg_dict.items():
    for im_i_idx, image_i in enumerate(test):
        _, __, match, inlier ,msd = detect_match(algorithm=alg,query_img=image_i,train_img=test[2],
                                                     min_match_count=10)
        if match==0:
            match=np.inf
        inliers_matches_test[im_i_idx].append(np.sum(inlier)/(len(match)))

  import sys


In [171]:
# Change all NaN to 0.0
for i in range(len(inliers_matches_test)):
    for j in range(len(inliers_matches_test[i])):
        if(math.isnan(inliers_matches_test[i][j])): 
            inliers_matches_test[i][j] = 0.0

In [182]:
# Write result column for test dataset
Y_test = []
for i in range(len(data_set6)):
    num = int((data_set5[i].split("_")[1]).split(".")[0])
    if(((num >= 3216) and (num <= 3336)) or (num >= 3617)):
        Y_test.append(1)
    else:
        Y_test.append(0)

In [206]:
xgb_model = xgb.XGBClassifier().fit(np.array(inliers_matches_train), Y_train)
predictions = xgb_model.predict(np.array(inliers_matches_test))
actuals = Y_test
print(confusion_matrix(actuals, predictions))

[[ 8  6]
 [ 2 12]]


In [208]:
predictions

array([1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 1])

In [211]:
np.array(Y_test)

array([1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0])

In [216]:
count = 0
for i in range(len(predictions)):
    if(predictions[i] == Y_test[i]):
        count = count + 1
accuracy = count/len(predictions)
print(accuracy)
# It is better to use metrics from sklearn but I am tired


0.7142857142857143


In [217]:
# Here we have 71% acuracy, 
# actualy train and test data set pretty small
# Also it is only 2 features in DS affect to our model 
# not in best side. By the way it will be nice to use 
# KFold for better training and GridSearchCV to find best
# params for XGBoost

