Initial commit

cvjena · Aug 17, 2017 · 7f46b70 · 7f46b70
1 parent 8b2d249
commit 7f46b70
Show file tree

Hide file tree

Showing 38 changed files with 26,412 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,2 +1,81 @@
-# aid
 Automatic Query Image Disambiguation (AID)
+==========================================
+
+This repository contains the reference implementation of AID and code that can be used
+to reproduce the results from the corresponding paper:
+
+> Björn Barz and Joachim Denzler.
+> "Automatic Query Image Disambiguation for Content-based Image Retrieval."
+
+If you use AID, please cite that paper.
+
+
+Dependencies
+------------
+
+##### Mandatory
+
+- Python >= 3.3
+- numpy
+- scipy
+- scikit-learn
+
+##### Optional
+
+- caffe & pycaffe (required if you want to extract the image features yourself)
+- tqdm (for progress bars during feature extraction)
+- matplotlib (if you would like to generate graphs for Precision@k)
+
+
+Reproducing the results from the paper
+--------------------------------------
+
+### Getting the features
+
+Before you can actually run the benchmark of the different query image disambiguation methods,
+you need to compute some features for the images in the dataset. You can either just download
+a [.npy file with pre-computed features][1] (49 MB) for the MIRFLICKR dataset or you can extract
+the features yourself as follows:
+
+1. Download the MIRFLICKR-25K dataset:
+   http://press.liacs.nl/mirflickr/mirflickr25k/mirflickr25k.zip (2.9 GB)
+2. Extract the downloaded file inside of the `mirflickr` directory of this directory, so that you
+   end up with another `mirflickr` directory inside of the top-level `mirflickr` directory.
+3. Download the pre-trained weights of the VGG 16 model and store them in the `model` directory:
+   http://www.robots.ox.ac.uk/%7Evgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel (528 MB)
+4. From the root directory of the repository, run: `python extract_features.py`
+
+### Running the benchmark
+
+Once you have downloaded or extracted the features of the dataset images, you can run the benchmark
+as follows:
+
+    python evaluate_query_disambiguation.py --show_sd --plot_precision
+
+See `python evaluate_query_disambiguation.py --help` for the full list of options.
+
+The result should be similar to the following:
+
+                |   AP   |  P@1   |  P@10  |  P@50  | P@100  |  NDCG  | NDCG@100
+    ----------------------------------------------------------------------------
+    Baseline    | 0.3753 | 0.7286 | 0.6800 | 0.6100 | 0.5664 | 0.8223 |   0.5880
+    CLUE        | 0.3810 | 0.9100 | 0.8133 | 0.6462 | 0.5816 | 0.8290 |   0.6232
+    Hard-Select | 0.3849 | 0.8457 | 0.8469 | 0.6846 | 0.6011 | 0.8314 |   0.6426
+    AID         | 0.4625 | 0.8757 | 0.8206 | 0.7211 | 0.6711 | 0.8531 |   0.6991
+
+
+    Standard Deviation:
+
+                |   AP   |  P@1   |  P@10  |  P@50  | P@100  |  NDCG  | NDCG@100
+    ----------------------------------------------------------------------------
+    Baseline    | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 | 0.0000 |   0.0000
+    CLUE        | 0.0005 | 0.0239 | 0.0074 | 0.0045 | 0.0033 | 0.0003 |   0.0037
+    Hard-Select | 0.0006 | 0.0270 | 0.0068 | 0.0072 | 0.0031 | 0.0005 |   0.0039
+    AID         | 0.0053 | 0.0203 | 0.0087 | 0.0085 | 0.0088 | 0.0017 |   0.0075
+
+The baseline results should match exactly, while deviations may occur in the other rows due to
+randomization.
+
+
+
+[1]: http://www.inf-cv.uni-jena.de/dbvmedia/de/Barz/AID/features.npy
diff --git a/aid.py b/aid.py
@@ -0,0 +1,159 @@
+import numpy as np
+import scipy.linalg
+from sklearn.decomposition import PCA
+from sklearn.cluster import KMeans
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.utils.graph import graph_laplacian
+
+from common import baseline_retrieval
+
+
+
+EPS = np.finfo('float32').resolution
+
+
+
+## AID ##
+
+
+def automatic_image_disambiguation(features, queries, select_clusters, gamma = 1.0, k = 200, n_clusters = None, max_clusters = 10):
+    """ Automatic Image Disambiguation (our method) based on clustering of directions and directed boni.
+    
+    features - n-by-d matrix containing d-dimensional features of n samples.
+    
+    queries - Dictionary mapping query IDs to dictionaries with keys 'relevant' and 'img_id'. 'img_id' gives the ID of the query
+              image and 'relevant' points to a list of IDs of images relevant for this query.
+    
+    select_clusters - Callback function taking a query dictionary with keys 'relevant' and 'img_id' and a list of lists of images
+                      for each cluster as arguments and returning a list of indices of selected clusters.
+    
+    gamma - Controls the effect of the cluster selection. For gamma < 1.0, the direction of samples must match the selected direction
+            more exactly for those samples being adjusted, while for very large gamma, even samples in the orthogonal direction will
+            be assigned a highly adjusted distance.
+    
+    k - The number of baseline retrieval results to be used for the initial clustering step.
+    
+    n_clusters - The number of clusters (image senses) to be shown to the user for selection of the relevant clusters. If set to None,
+                 the number of clusters will be determined heuristically.
+    
+    max_clusters - Maximum number of clusters. Has only an effect if n_clusters is None.
+    
+    Returns: re-ranked retrieval results as dictionary mapping query IDs to tuples consisting of an ordered list of retrieved image IDs
+             and a corresponding list of adjusted distances to the query.
+    """
+
+    # Baseline retrieval
+    retrievals = baseline_retrieval(features, queries, select_clusters)
+
+    for qid, (ret, distances) in retrievals.items():
+
+        query = queries[qid]
+        query_feat = features[query['img_id']]
+
+        # Compute directions from query to results
+        directions = features[ret] - query_feat[None,:]
+        directions /= np.maximum(np.linalg.norm(directions, axis = -1, keepdims = True), EPS)
+
+        # Cluster directions of top results
+        nc = n_clusters if (n_clusters is not None) and (n_clusters >= 1) else determine_num_clusters_spectral(directions[:k, :], max_clusters = max_clusters)
+        if nc > 1:
+            km = KMeans(nc, n_init = 100, max_iter = 1000, n_jobs = -1)
+            # The KMeans implementation of sklearn <= 0.18.X suffers from numerical precision errors when using float32,
+            # so we convert the data to float64 for clustering. See: https://github.com/scikit-learn/scikit-learn/issues/7705
+            cluster_ind = km.fit_predict(directions[:k, :].astype(np.float64))
+
+            # Ask user to select relevant clusters
+            cluster_preview = [[id for id, l in zip(ret, cluster_ind) if l == i] for i in range(nc)]
+            selected_clusters = select_clusters(query, cluster_preview)
+
+            # Re-rank results by taking their direction in relation to the selected clusters into account
+            if (len(selected_clusters) > 0) and (len(selected_clusters) < nc):
+                distances = adjust_distances(distances, directions, km.cluster_centers_[selected_clusters, :], gamma)
+                ind = np.argsort(distances)
+                retrievals[qid] = (ret[ind], distances[ind])
+
+    return retrievals
+
+
+def determine_num_clusters_spectral(X, max_clusters = 10, gamma = None):
+    """ Determine number of clusters based on Eigengaps of Graph Laplacian. """
+
+    if gamma is None:
+        gamma = np.sqrt(X.shape[1])
+
+    adjacency = rbf_kernel(X, gamma = gamma)
+    laplacian = graph_laplacian(adjacency, normed = True, return_diag = False)
+    eig = scipy.linalg.eigh(laplacian, eigvals = (0, min(max_clusters, laplacian.shape[0] - 1)), eigvals_only = True)
+
+    eigengap = eig[1:] - eig[:-1]
+    return np.argmax(eigengap) + 1
+
+
+def adjust_distances(distances, directions, selected_directions, gamma = 1.0):
+    """ Reduce distances of samples in the selected directions and increase distances of samples in the opposite directions.
+    
+    distances - Vector of length n with distances of samples in the database to the query.
+    
+    directions - n-by-d matrix with directions from the query to samples in the database, normalized to unit length.
+    
+    selected_directions - m-by-d matrix of relevant directions.
+    
+    gamma - Controls the effect of the cluster selection. For gamma < 1.0, the direction of samples must match the selected direction
+            more exactly for those samples being adjusted, while for very large gamma, even samples in the orthogonal direction will
+            be assigned a highly adjusted distance.
+    
+    Returns: adjusted distances of the samples in the database to the query.
+    """
+
+    # Broadcast single direction to matrix
+    if selected_directions.ndim == 1:
+        selected_directions = selected_directions[None,:]
+
+    # Normalize directions
+    directions = directions / np.maximum(np.linalg.norm(directions, axis = -1, keepdims = True), EPS)
+    selected_directions = selected_directions / np.maximum(np.linalg.norm(selected_directions, axis = -1, keepdims = True), EPS)
+
+    # Compute cosine similarity to most similar direction as dot product (thanks to normalization)
+    sim = np.dot(directions, selected_directions.T).max(axis = -1)
+
+    # Fuse distance to query and similarity to directions and re-sort results
+    max_dist = np.max(distances)
+    return distances - np.sign(sim) * (np.abs(sim) ** gamma) * max_dist
+
+
+
+## Hard Cluster Selection on the same clusters as AID ##
+
+
+def hard_cluster_selection(features, queries, select_clusters, k = 200, n_clusters = None, max_clusters = 10):
+    """ Hard Cluster Selection as used by CLUE, but on the clusters determined by AID (our method). """
+
+    # Baseline retrieval
+    retrievals = baseline_retrieval(features, queries, select_clusters)
+
+    for qid, (ret, distances) in retrievals.items():
+
+        query = queries[qid]
+        query_feat = features[query['img_id']]
+
+        # Compute directions from query to results
+        directions = features[ret] - query_feat[None,:]
+        directions /= np.maximum(np.linalg.norm(directions, axis = -1, keepdims = True), EPS)
+
+        # Cluster directions of top results
+        nc = n_clusters if (n_clusters is not None) and (n_clusters >= 1) else determine_num_clusters_spectral(directions[:k, :], max_clusters = max_clusters)
+        if nc > 1:
+            km = KMeans(nc, n_init = 100, max_iter = 1000, n_jobs = -1)
+            cluster_ind = km.fit_predict(directions[:k, :].astype(np.float64))
+
+            # Ask user to select relevant clusters
+            cluster_preview = [[id for id, l in zip(ret, cluster_ind) if l == i] for i in range(nc)]
+            selected_clusters = select_clusters(query, cluster_preview)
+
+            # Put images from the selected clusters first
+            retrievals[qid] = (
+                np.concatenate(([id for i, id in enumerate(ret[:k]) if cluster_ind[i] in selected_clusters], [id for i, id in enumerate(ret[:k]) if cluster_ind[i] not in selected_clusters], ret[k:])),
+                np.concatenate(([dist for i, dist in enumerate(distances[:k]) if cluster_ind[i] in selected_clusters], [dist for i, dist in enumerate(distances[:k]) if cluster_ind[i] not in selected_clusters], distances[k:]))
+            )
+
+    return retrievals