In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import scipy
import pickle
import random
import os
import tqdm

from scipy.spatial import distance
from shutil import copyfile
from multiprocessing import Pool
from cv2 import imread
from tqdm.notebook import tqdm as tqdm_notebook

In [2]:
with open("../split_delete/Mesh_overlay_train_error _delete.txt", "r") as f:
    train_delete = list(map(lambda x: x.strip(), f.readlines()))

with open("../split_delete/Mesh_overlay_val_error_delete.txt", "r") as f:
    val_delete = list(map(lambda x: x.strip(), f.readlines()))

In [3]:
APOLLOSCAPE_PATH = "/disks/hdd/ECCV2018_apollo/train/images/"
PKU_PATH = "/disks/hdd/pku-autonomous-driving/train_images/"

In [4]:
# Feature extractor
def extract_features(image_path, vector_size=32):
    image = imread(image_path)[:, :, ::-1]
    try:
        # Using KAZE, cause SIFT, ORB and other was moved to additional module
        # which is adding addtional pain during install
        alg = cv2.KAZE_create()
        # Dinding image keypoints
        kps = alg.detect(image)
        # Getting first 32 of them. 
        # Number of keypoints is varies depend on image size and color pallet
        # Sorting them based on keypoint response value(bigger is better)
        kps = sorted(kps, key=lambda x: -x.response)[:vector_size]
        # computing descriptors vector
        kps, dsc = alg.compute(image, kps)
        # Flatten all of them in one big vector - our feature vector
        dsc = dsc.flatten()
        # Making descriptor of same size
        # Descriptor vector size is 64
        needed_size = (vector_size * 64)
        if dsc.size < needed_size:
            # if we have less the 32 descriptors then just adding zeros at the
            # end of our feature vector
            dsc = np.concatenate([dsc, np.zeros(needed_size - dsc.size)])
    except cv2.error as e:
        print('Error: ', e)
        return None

    return dsc


def worker(filename):
    name = filename.split('/')[-1].lower()
    return extract_features(filename)


def batch_extractor(files, pickled_db_path="features.pck", n_jobs=1):
    with Pool(n_jobs) as pool:
        res = list(tqdm_notebook(pool.imap(worker, files), total=len(files)))
    
    result = {filename: r for filename, r in zip(files, res)}
    
    # saving all our feature vectors in pickled file
    with open(pickled_db_path, 'wb') as fp:
        pickle.dump(result, fp)

In [5]:
files = [os.path.join(APOLLOSCAPE_PATH, p + ".jpg") for p in train_delete + val_delete]
files = [f for f in files if os.path.exists(f)]
batch_extractor(files, "apolloscape_features.pkl", 8)

HBox(children=(FloatProgress(value=0.0, max=44.0), HTML(value='')))




In [5]:
files = [os.path.join(PKU_PATH, p) for p in sorted(os.listdir(PKU_PATH))]
batch_extractor(files, "pku_train_features.pkl", 8)

HBox(children=(FloatProgress(value=0.0, max=4262.0), HTML(value='')))




In [4]:
with open("apolloscape_features.pkl", "rb") as f:
    apolloscape_features = pickle.load(f)
with open("pku_train_features.pkl", "rb") as f:
    pku_train_features = pickle.load(f)

In [5]:
len(apolloscape_features), len(pku_train_features)

(44, 4262)

In [6]:
class Matcher(object):

    def __init__(self, pickled_db_path="features.pck"):
        with open(pickled_db_path, "rb") as fp:
            self.data = pickle.load(fp)
        self.names = []
        self.matrix = []
        for k, v in self.data.items():
            self.names.append(k)
            self.matrix.append(v)
        self.matrix = np.array(self.matrix)
        self.names = np.array(self.names)

    def cos_cdist(self, vector):
        # getting cosine distance between search image and images database
        v = vector.reshape(1, -1)
        return distance.cdist(self.matrix, v, 'cosine').reshape(-1)

    def match(self, features, topn=5):
        img_distances = self.cos_cdist(features)
        # getting top 5 records
        nearest_ids = np.argsort(img_distances)[:topn].tolist()
        nearest_img_paths = self.names[nearest_ids].tolist()

        return nearest_img_paths, img_distances[nearest_ids].tolist()

In [7]:
m = Matcher("pku_train_features.pkl")

In [8]:
apolloscape_features.keys()

dict_keys(['/disks/hdd/ECCV2018_apollo/train/images/180114_024344459_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_024346629_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_024348519_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_024407578_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_024412238_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_024951111_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025010537_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025021967_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025027681_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025144097_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025247600_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025314534_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images/180114_025324232_Camera_5.jpg', '/disks/hdd/ECCV2018_apollo/train/images

In [9]:
m.match(apolloscape_features['/disks/hdd/ECCV2018_apollo/train/images/180310_031213854_Camera_5.jpg'])

(['/disks/hdd/pku-autonomous-driving/train_images/ID_7f535a8b7.jpg',
  '/disks/hdd/pku-autonomous-driving/train_images/ID_4b29ca144.jpg',
  '/disks/hdd/pku-autonomous-driving/train_images/ID_4ed3caa54.jpg',
  '/disks/hdd/pku-autonomous-driving/train_images/ID_0e8575fa6.jpg',
  '/disks/hdd/pku-autonomous-driving/train_images/ID_c2e4e24da.jpg'],
 [0.1287426513886315,
  0.1738005998305403,
  0.17760406191770894,
  0.18615926373417235,
  0.18617297686150935])

In [11]:
BASE = "similar"
for apollo_path, apollo_features in apolloscape_features.items():
    name = os.path.basename(apollo_path)
    path = os.path.join(BASE, name.split('.')[0])
    os.makedirs(path, exist_ok=True)
    
    copyfile(apollo_path, os.path.join(path, name))
    
    matches = m.match(apollo_features, 80)
    for match in matches[0]:
        name = os.path.basename(match)
        copyfile(match, os.path.join(path, name))

In [None]:
for orig