In [2]:
from tqdm import tqdm
import utils.io, utils.hpatches, utils.features
import matplotlib.pyplot as plt
import os

In [3]:
from torch import nn
from torch.nn import functional as F
import kornia as K
from torchvision import transforms
import numpy as np
from sklearn.decomposition import PCA
import time
import cv2

class NaiveSemantic():
    def __init__(self, model='deeplabv3_mobilenet_v3_large', contrastThreshold=0.04, edgeThreshold=10):

        self.device = torch.device('cpu')
        if torch.cuda.is_available():
            self.device = torch.device('cuda:0')

        self.localExtractor = cv2.SIFT_create(nfeatures=0, contrastThreshold=contrastThreshold, edgeThreshold=edgeThreshold, nOctaveLayers=3)
        self.semanticExtractor = torch.hub.load('pytorch/vision:v0.10.0', model, pretrained=True).eval().to(self.device)
        self.semanticExtractor.classifier = nn.Sequential(*list(self.semanticExtractor.classifier.children())[:-1])
        self.semanticExtractor.aux_classifier = nn.Sequential(*list(self.semanticExtractor.aux_classifier.children())[:-2])

        self.preprocess = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: K.color.bgr_to_rgb(x)),
            transforms.Lambda(lambda x: x.to(self.device)),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            transforms.Resize(799, max_size=800)
        ])

    def semanticMap(self, img):
        input_tensor = self.preprocess(img)
        input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

        with torch.no_grad():
            torch.cuda.empty_cache()
            output = self.semanticExtractor(input_batch)['out'][0]
            
        return output

    def extractSemantic(self, img, np_kps):

        output = self.semanticMap(img)
        output = F.interpolate(output.unsqueeze(0), size=(img.shape[0], img.shape[1]), mode='bilinear', align_corners=True).squeeze(0)

        semantic_features = output[:, np_kps[:,1], np_kps[:, 0]].T.cpu().numpy()
        semantic_features = semantic_features / np.max(semantic_features, axis=1)[:, np.newaxis]
        
        return semantic_features

    def detectAndCompute(self, img, opt=None):
        if len(img.shape) == 3:
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        else:
            gray = img
            
        kps, local_features = self.localExtractor.detectAndCompute(gray, opt)
        if len(kps) == 0:
            return [], []
        
        np_kps = np.array([[kp.pt[0], kp.pt[1]] for kp in kps]).astype(int)
        local_features = local_features/local_features.max()

        semantic_features = self.extractSemantic(img, np_kps)
        features = np.hstack([local_features, semantic_features])

        return kps, features


In [4]:
def adjustDesc(desc, proportion):
    if len(desc) == 0:
        return []

    visual = desc[:, :128]
    semantic = desc[:, 128:]
    
    new_total = 128
    semantic_size = int(new_total*proportion)
    visual_size = new_total - semantic_size

    if visual_size > 0:
        new_visual = cv2.resize(visual,     (visual_size, visual.shape[0]))
    
    if semantic_size > 0:
        new_semantic = cv2.resize(semantic, (semantic_size, semantic.shape[0]))

    if visual_size == 0:
        return new_semantic
    
    if semantic_size == 0:
        return new_visual

    new_desc = np.concatenate([new_visual, new_semantic], axis=1)

    return new_desc

In [None]:
bf = cv2.BFMatcher()

def makeMatch(kps1, desc1, kps2, desc2, idx=False, dmatch=False):

    if len(desc1) == 0 or len(desc2) == 0:
        if dmatch:
            return dmatches
        return [], []

    matches = bf.knnMatch(desc1,desc2,k=2)

    mkpts1 = []
    mkpts2 = []
    # Apply ratio test
    idx1 = []
    idx2 = []
    dmatches = []
    for m,n in matches:
        if m.distance < 0.9*n.distance:
            dmatches.append(m)
            mkpts1.append(kps1[m.queryIdx])
            mkpts2.append(kps2[m.trainIdx])
            idx1.append(m.queryIdx)
            idx2.append(m.trainIdx)
    if dmatch:
        return dmatches

    if idx:
        return np.array(idx1), np.array(idx2)

    return np.array(mkpts1), np.array(mkpts2)

In [None]:
def evalMatch(mkpts2, gtkpts, total, K=10):
    if len(mkpts2) == 0:
        return np.zeros(K), np.zeros(K)

    diff = mkpts2.astype(int) - gtkpts.astype(int)
    diff = np.linalg.norm(diff, axis=1)
    
    ms = np.zeros(K)
    mma = np.zeros(K)
    for i, k in enumerate(range(K)):
        acc_at_k = (np.abs(diff) <= k).sum()
        ms[i] = acc_at_k/total
        mma[i] = acc_at_k/len(mkpts2)

    return ms, mma