In [None]:
import argparse
import glob
import sys
import pickle

import cv2

from delf import feature_io

import matplotlib.image as mpimg
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from scipy.spatial import cKDTree
from skimage.feature import plot_matches
from skimage.measure import ransac
from skimage.transform import AffineTransform

import tensorflow as tf
import tensorflow_hub as hub

from tensorflow.python.platform import app

In [None]:
train_path = './train-highres/'
non_landmark_train_path = './distractors/*/'
dev_path = './dev/'
non_landmark_dev_path = './distractors-dev/'
test_path = './test-highres/'

_DISTANCE_THRESHOLD = 0.8

input_shape = (384, 384)

n_cat = 14942

In [None]:
train_image_files = glob.glob(train_path + '*.jpg')
train_image_ids = [image_file.replace(
    '.jpg', '').replace(train_path, '') for image_file in train_image_files]
train_info_full = pd.read_csv('train.csv', index_col='id')
train_info_full.head()
train_info = train_info_full.loc[train_image_ids]
train_info['filename'] = pd.Series(train_image_files, index=train_image_ids)

train_info_correct = pd.read_csv('train_info_correct.csv', index_col='id')
train_info = train_info[train_info['landmark_id'].isin(train_info_correct['landmark_id'])]

train_info.head()

In [None]:
non_landmark_image_files = glob.glob(non_landmark_train_path+'*.jp*g')
nlm_df = pd.DataFrame({'filename':non_landmark_image_files})
nlm_df['landmark_id'] = -1
print(len(nlm_df))
nlm_df.head()

In [None]:
n_cat_train = train_info['landmark_id'].nunique()
print(n_cat_train)
if n_cat_train != n_cat:
    warnings.warn('Warning: The training data is not compatible.')
    

In [None]:
dev_image_files = glob.glob(dev_path + '*.jpg')
dev_image_ids = [image_file.replace(
    '.jpg', '').replace(dev_path, '') for image_file in dev_image_files]
dev_info = train_info_full.loc[dev_image_ids]
dev_info['filename'] = pd.Series(dev_image_files, index=dev_image_ids)
#dev_info=dev_info[dev_info['landmark_id'].isin(train_info['landmark_id'])]
dev_info.head()

In [None]:
non_landmark_dev_image_files = glob.glob(non_landmark_dev_path+'*.jpg')
nlm_dev_df = pd.DataFrame({'filename': non_landmark_dev_image_files})
nlm_dev_df['landmark_id'] = -1
print(len(nlm_dev_df))
nlm_dev_df.index = [str(i) for i in nlm_dev_df.index]
nlm_dev_df.head()

In [None]:
test_info_full = pd.read_csv('test.csv', index_col='id')
test_info_full.head()

test_image_files = glob.glob(test_path+'*.jpg')
test_image_ids = [image_file.replace(
    '.jpg', '').replace(test_path, '') for image_file in test_image_files]

test_info=test_info_full.loc[test_image_ids]
test_info['filename'] = pd.Series(test_image_files, index=test_image_ids)

test_info.head()

In [None]:
print("Landmark_id of image", train_image_files[0], ":", 
      train_info.loc[train_image_ids[0]]['landmark_id'])
print(train_info["landmark_id"].max())
testimg = cv2.cvtColor(cv2.imread(np.random.choice(train_image_files)), 
                     cv2.COLOR_BGR2RGB)
plt.imshow(testimg)
testimg.shape

In [None]:
def get_delf_features(info, odir, start_i=0):
    def image_input_fn():
        filename_queue = tf.train.string_input_producer(
          info['filename'].values.tolist(), shuffle=False)
        reader = tf.WholeFileReader()
        _, value = reader.read(filename_queue)
        image_tf_raw = tf.image.decode_jpeg(value, channels=3)
        image_tf = tf.image.resize_images(image_tf_raw, [224, 224])
        return tf.image.convert_image_dtype(image_tf, tf.float32)
    
    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.FATAL)

    m = hub.Module('https://tfhub.dev/google/delf/1')

    # The module operates on a single image at a time, so define a placeholder to
    # feed an arbitrary image in.
    image_placeholder = tf.placeholder(
        tf.float32, shape=(input_shape[0], input_shape[1], 3), name='input_image')

    module_inputs = {
        'image': image_placeholder,
        'score_threshold': 100.0,
        'image_scales': [0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0],
        'max_feature_num': 1000,
    }

    module_outputs = m(module_inputs, as_dict=True)

    with tf.Session() as sess:        
        sess.run(tf.global_variables_initializer())
        for i in range(start_i,len(info)):
            fname = info.iloc[i]['filename']
            img_id = info.index[i]
#            print(fname)
            try:
                img = cv2.cvtColor(
                      cv2.resize(cv2.imread(fname),input_shape),
                      cv2.COLOR_BGR2RGB) / 255.
            except:
                warnings.warn('Warning: could not read image: ' 
                              + fname +
                              '. Use black img instead.')
                img = np.zeros((input_shape[0], input_shape[1], 3), dtype=np.float32)
                
            locations, descriptions = sess.run(
                [module_outputs['locations'],  module_outputs['descriptors']],
                feed_dict={image_placeholder: img})
            if i % 100 == 0:
                print(i, '/', len(info))
                np.savetxt(odir + 'last_i.txt', np.array([i]))
            
            np.save(odir + img_id + '_loc.npy', locations)
            np.save(odir + img_id + '_desc.npy', descriptions)
            

In [None]:
n_ref_imgs = 48
#def sample(df):
#    return df.sample(min(n_ref_imgs,len(df)))
#train_info_red=train_info.groupby('landmark_id', group_keys=False).apply(sample)
#print(len(train_info_red))
#train_info_red.to_csv('train_info_red_sample_1.csv')

In [None]:
train_info_red = pd.read_csv('train_info_red_sample_1.csv', index_col='id')

In [None]:
train_info_red.head()

In [None]:
get_delf_features(train_info_red, 'delf-train/', start_i=0)

In [None]:
get_delf_features(dev_info, 'delf-dev/')

In [None]:
get_delf_features(test_info, 'delf-test/', start_i=0)

In [None]:
get_delf_features(nlm_dev_df, 'delf-nlm-dev/')