In [1]:
import os
#os.environ['CUDA_VISIBLE_DEVICES']="1"

import matplotlib.pyplot as plt
import re
import pandas as pd
from pathlib import Path
import torch

from fastprogress import master_bar, progress_bar
%config InlineBackend.figure_format ='retina'

In [2]:
EXTRACT_FEATURES = False

In [3]:
if EXTRACT_FEATURES:
    from fastai import *
    from fastai.vision import *
    from fastai.callbacks import *
    from arch import RingGeMNet, GeMNet, L2Norm, GeM


In [None]:
# ./train -> points to index dir, but if you have train_images.csv and feats no need to have images 
COMP_DATA_DIR = Path('.')

ti_fname = 'train_images.gz'
try:
    df = pd.read_pickle(ti_fname)
except:
    df = pd.DataFrame({'Image' : sorted(get_image_files(COMP_DATA_DIR / 'train', recurse=True))})
    df.to_pickle(ti_fname)

In [None]:
df.head()

In [None]:
SIZE = 256
DO_FULL_SIZE = False 

if EXTRACT_FEATURES:

    NUM_WORKERS=8

    class ImageListAbsPath(ImageList):
        def open(self, fn:PathOrStr)->Image:
            return open_image(fn.replace('./',''))

    tfms = (None, None)
    if DO_FULL_SIZE:
        BS=1
        data = (ImageList.from_df(df,path='', cols=['Image'])
                .split_none()
                .label_const()
                .transform(tfms, resize_method=ResizeMethod.NO)
                .databunch(bs=BS, num_workers=NUM_WORKERS)
                .normalize(imagenet_stats)
               ) 
        data.train_dl.dl.batch_sampler.sampler = torch.utils.data.SequentialSampler(data.train_ds)
        data.train_dl.dl.batch_sampler.drop_last = False
    if not DO_FULL_SIZE:
        BS=64
        data = (ImageList.from_df(df,path='', cols=['Image'])
                .split_none()
                .label_const()
                .transform(tfms, resize_method=ResizeMethod.SQUISH, size=SIZE)
                .databunch(bs=BS, num_workers=NUM_WORKERS)
                .normalize(imagenet_stats)
               ) 
        data.train_dl.dl.batch_sampler.sampler = torch.utils.data.SequentialSampler(data.train_ds)
        data.train_dl.dl.batch_sampler.drop_last = False

In [None]:
arch = models.resnet152 if EXTRACT_FEATURES else None
model_fname =  'resnet152_i200_l1000-256'
basename_suffix = 'cut-extractor-2scales6patches-gem3'
size_fname = 'full' if DO_FULL_SIZE else str(SIZE)

basename = f'{model_fname or arch.__name__}_{size_fname}_{basename_suffix}.pth'
print(basename)

In [None]:
if EXTRACT_FEATURES:
    class Extractor(nn.Module):
        def __init__(self):
            super().__init__()
            self.l2norm = L2Norm()
            self.pool   = GeM(3.) #nn.AdaptiveMaxPool2d(1)
        def forward(self, x):
            b,d,ny,nx = x.shape
            f0  = self.l2norm(self.pool(x)).view(b,1,d)        
            return f0
    
    learn = cnn_learner(data, arch,pretrained=True, custom_head=Extractor(),
                       metrics=[accuracy], cut= -1,
                       loss_func=nn.CrossEntropyLoss())

    if model_fname:
        learn = learn.load(model_fname, strict=False)
    else:
        model_fname = arch.__name__
    learn.summary()
    InferenceNet =  learn.model
else:
    learn, InferenceNet = None, None

In [None]:
NUM_WORKERS=16

qi_fname = 'query_images.gz'
try:
    qdf = pd.read_pickle(qi_fname)
except:    
    qdf = pd.DataFrame({'Image' : sorted(get_image_files(COMP_DATA_DIR / 'test', recurse=True))})
    qdf.to_pickle(qi_fname)
qdf.head()

In [None]:
if EXTRACT_FEATURES:
    BS=1 if DO_FULL_SIZE else 64
    qdata = (ImageList.from_df(qdf,path='', cols=['Image'])
            .split_none()
            .label_const()
            .transform(tfms, 
                       resize_method=ResizeMethod.NO if DO_FULL_SIZE else ResizeMethod.SQUISH, 
                       size=None if DO_FULL_SIZE else SIZE)
            .databunch(bs=BS, num_workers=NUM_WORKERS)
            .normalize(imagenet_stats)
           ) 
    qdata.train_dl.dl.batch_sampler.sampler = torch.utils.data.SequentialSampler(qdata.train_ds)
    qdata.train_dl.dl.batch_sampler.drop_last = False

In [None]:
def extract_vectors_batched(data,model,flip=False):
    model.cuda()
    model.eval()
    n_flip = 2 if flip else 1
    n_img = len(data.train_ds) * n_flip
    bs = data.batch_size
    vectors = None

    with torch.no_grad():
        for idx, (img,label) in enumerate(progress_bar(data.train_dl)):
            st=idx*bs*n_flip
            fin=min((idx+1)*bs*n_flip, n_img)
            if flip:
                img = torch.cat((img,img.flip([3])))
            out = model(img).cpu()
            if vectors is None: vectors = torch.zeros(n_img, *out.shape[1:])
            if flip:
                n = fin - st
                vectors[st:fin:2    ,...] = out[:n//2,...]
                vectors[st+1:fin+1:2,...] = out[n//2:,...]
            else:
                vectors[st:fin,...] = out
    return vectors

def extract_vectors_batched_multi(data,model):
    model.cuda()
    model.eval()
    n_img = len(data.train_ds)
    bs = data.batch_size
    vectors = None
    #hook = hook_outputs([learn.model[8]])#, learn.model[6]))
    
    #extractor = Extractor().cuda().eval()

    with torch.no_grad():
        for idx, (img,label) in enumerate(progress_bar(data.train_dl)):
            st=idx*bs
            fin=min((idx+1)*bs, n_img)
            layer_output = model(img).cpu()
            if vectors is None:
#                vectors = [torch.zeros(n_img, layer_output.shape[1]) for layer_output in hook.stored]
                vectors = torch.zeros(n_img, *layer_output.shape[1:]) 
            vectors[st:fin,...] = layer_output

            #for i, layer_output in enumerate(hook.stored):
            #    vectors[i][st:fin,...] = extractor(layer_output)
    return vectors

In [None]:
flip = True
p_flip = 'flip' if flip else ''
try:
    print("Attempting to load QUERY features from disk...", end="")
    query_features = torch.load( f'query{p_flip}_{basename}')
    print("OK")
except:
    print("Failed. Computing features...")
    query_features = extract_vectors_batched(qdata,InferenceNet, flip)
    torch.save(query_features, f'query{p_flip}_{basename}')
try:
    print("Attempting to load TRAIN features from disk...", end="")
    index_features = torch.load( f'train{p_flip}_{basename}')
    print("OK")
except:
    print("Failed. Computing features...")
    index_features = extract_vectors_batched(data, InferenceNet, flip)
    torch.save(index_features, f'train{p_flip}_{basename}')

In [None]:
query_features, index_features

In [None]:
#Now lets do the nearest neighbor search and create the submission
import faiss
def flatten(list2d): return list(itertools.chain(*list2d))

query_fnames = flatten([[x.stem, x.stem] for x in qdf.Image.tolist()])
index_fnames = flatten([[x.stem, x.stem] for x in df.Image.tolist()])


In [None]:
learn, InferenceNet, co, res, flat_config, cpu_index, index = None, None, None, None, None, None, None
gc.collect()
torch.cuda.empty_cache()

In [None]:
def t_pcawhitenlearn(X):

    N = X.shape[0]

    # Learning PCA w/o annotations
    m = X.mean(dim=0, keepdim=True)
    Xc = X - m
    Xcov = Xc.t() @ Xc
    Xcov = (Xcov + Xcov.t()) / (2*N)
    eigval, eigvec = torch.symeig(Xcov,eigenvectors=True)
    order = eigval.argsort(descending=True)
    eigval = eigval[order]
    eigvec = eigvec[:, order]

    P = torch.inverse(torch.sqrt(torch.diag(eigval))) @ eigvec.t()
    
    return m, P

def t_whitenapply(X, m, P, dimensions=None):
    
    if not dimensions: dimensions = P.shape[1]

    X = (X-m) @ P[:,:dimensions]
    X = X / (torch.norm(X, dim=1, keepdim=True) + 1e-6)
    return X

def get_idxs_and_dists(_query_features, _index_features, index_type='', BS = 32):
    
    if False:
        index_transforms = []
        for index_transform in index_type.split(','):
            m = re.match(r'PCAW(\d+)?', index_transform)
            if m is not None:
                dimensions = int(m[1]) if m[1] is not None else _index_features.shape[-1]
                print(f"Applying {dimensions} PCA, Whitening and L2Norm...", end="")
                m, P = t_pcawhitenlearn(_index_features)
                _index_features = t_whitenapply(_index_features, m, P,dimensions=dimensions).unsqueeze(1)
                _query_features = t_whitenapply(_query_features, m, P,dimensions=dimensions).unsqueeze(1)
                print("done")

            elif index_transform not in ['L2norm']: index_transforms.append(index_transform)

        index_type = ','.join(index_transforms)
        print(index_type)
    else:
         _index_features = _index_features.unsqueeze(1)
         _query_features = _query_features.unsqueeze(1)
        
    if isinstance(_query_features, Tensor): query_features = _query_features.numpy()
    if isinstance(_index_features, Tensor): index_features = _index_features.numpy()
    max_hits = 20
    
    n_patches = query_features.shape[1]
    n_queries = query_features.shape[0]

    print(query_features.shape, index_features.shape, n_queries, n_patches)
    
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.device = 0
    res = faiss.StandardGpuResources()
    co = faiss.GpuMultipleClonerOptions()
    co.shard=True
    co.shard_type=1
    co.useFloat16=False
    _index = faiss.index_factory(index_features.shape[1], index_type)#, faiss.METRIC_INNER_PRODUCT)
    try:
        index = _index #faiss.index_cpu_to_all_gpus(_index,co=co) #
        print("Index in GPU")
    except:
        index = _index
        print("Index in CPU")
    print("Training index...", end="")
    index.train(index_features)
    print("done")
    print("Adding features to index...", end="")
    index.add(index_features)
    print("done")
    out_dists = np.zeros((len(query_features), max_hits), dtype=np.float32)
    out_idxs  = np.zeros((len(query_features), max_hits), dtype=np.int32)
    NUM_QUERY = len (query_features)
    for ind in progress_bar(range(0, len(query_features), BS)):
        fin = ind+BS
        if fin > NUM_QUERY: fin = NUM_QUERY
        q_descs = query_features[ind:fin]
        D, I = index.search(q_descs, max_hits)
        out_dists[ind:fin] = D
        out_idxs[ind:fin] = I // n_patches
    return out_idxs, out_dists

In [None]:
faiss.omp_get_max_threads()

In [None]:
query_features.shape

In [None]:
#faiss.omp_set_num_threads(31)
index_type=f"PCAW{query_features.shape[-1]},L2norm,Flat"
#index_type="PCAW512,L2norm,IVF4096,PQ16"
#index_type="Flat"

#out_idxs, out_dists = get_idxs_and_dists(
#    torch.cat((query_features[0],query_features[1]),dim=-1).squeeze(1), 
#    torch.cat((index_features[0],index_features[1]),dim=-1).squeeze(1), BS = 32*4, index_type=index_type)

out_idxs, out_dists = get_idxs_and_dists(
    query_features.squeeze(1), 
    index_features.squeeze(1), BS = 32*4, index_type=index_type)

In [None]:
np.sort(out_dists.reshape((-1,int(out_idxs.shape[1]*1))), axis=1)

In [None]:
np.save(f'idx_{basename}.npy',  out_idxs)
np.save(f'dist_{basename}.npy', out_dists)

In [None]:
sub_fname = 'test_submission.csv'
sample_df = pd.read_csv('test.csv')
sample_df['images'] = ''

In [None]:
out_idxs[0]//2, out_idxs[1]//2

In [None]:
i = 2
idx = np.concatenate([out_idxs[i], out_idxs[i+1]], axis=0)
dst = np.concatenate([out_dists[i],out_dists[i+1]], axis=0) 
u_idx = np.unique(idx,return_index=True)[1]
i_dst = dst[u_idx]
o_dst =np.argsort(i_dst)
print(idx, o_dst, i_dst)
print(idx[u_idx[o_dst]]//2)

i = 2
idx = np.concatenate([out_idxs[i], out_idxs[i+1]], axis=0)//2
dst = np.concatenate([out_dists[i],out_dists[i+1]], axis=0) 
u_idx = np.unique(idx,return_index=True)[1]
i_dst = dst[u_idx]
o_dst =np.argsort(i_dst)
print(idx, o_dst, i_dst)
print(idx[u_idx[o_dst]])

In [None]:
sub = {}
for i, query_fname in progress_bar(enumerate(query_fnames), total=len(query_fnames)):
    #_out_idxs=out_idxs.reshape(-1,200)[i][np.unique(out_idxs.reshape(-1,200)[i],return_index=True)[1]]
    if i % 2: continue
    idx = np.concatenate([out_idxs[i], out_idxs[i+1]], axis=0)//2
    dst = np.concatenate([out_dists[i],out_dists[i+1]], axis=0) 
    u_idx = np.unique(idx,return_index=True)[1]
    i_dst = dst[u_idx]
    o_dst =np.argsort(i_dst)
    _out_idxs = idx[u_idx[o_dst]]

    #_out_idxs=out_idxs[i][np.unique(out_idxs[i],return_index=True)[1]]
    ids = [index_fnames[x*2] for x in _out_idxs[:100]]
    sub[query_fname] = ' '.join(ids)

In [None]:
sub_df = pd.DataFrame({'id' : list(sub.keys()), 'images':list(sub.values())})
sub_df = pd.concat([sub_df, sample_df]).drop_duplicates(subset=['id'])
sub_df.to_csv(sub_fname, index=False)

In [None]:
sub_df.iloc[:8]

In [None]:
def fix_path(p):
    fn = str(p.name)
    return p.parent / fn[0] / fn[1] / fn[2] / fn
def image_results(row, n= 12):
    r = [open_image(fix_path(Path('test') / (row.id + '.jpg')))]
    r.extend([open_image(fix_path(Path('index') / (id + '.jpg'))) for id in row.images.split(' ')[:n]])
    return r
show_all(image_results(sub_df.iloc[0]),r=4,figsize=(20, 20))

In [None]:
basename

In [None]:
!kaggle competitions submit -c landmark-retrieval-2019 -f {sub_fname} -m '{basename} {index_type}'

In [None]:
import time
time.sleep(10)

In [None]:
!kaggle competitions submissions -c landmark-retrieval-2019 -v > submissions.csv

In [None]:
submissions = pd.read_csv('submissions.csv')
submissions.iloc[0].publicScore