# Make quilter

In [35]:
import lib
from lib.inference import retrieval

from sklearn.metrics.pairwise import euclidean_distances
import time

%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
import numpy as np
import os
import matplotlib.pyplot as plt

IMAGES_DIR = '/ssd/esteva/skindata4/images'
SAVE_NAME='/tmp/retrieval/quilt.jpg'

In [24]:

def readtxt(fn):
    return np.array([line.strip() for line in open(fn).readlines()])

def readmat(fn):
    return np.load(fn)

def addFullImagePath(basename):
    return os.path.join(IMAGES_DIR, basename)

def keep_unique_filenames(filenames, mat):
    """Reduces filenames and mat based on filename uniqueness.
    
    Filenames come in the form:
        '-0GnmpIR2i8V5M.jpg'
        '-0GnmpIR2i8V5M.jpg_1'
        '-0GnmpIR2i8V5M.jpg_2'
        ...
    This function strips the _1, _2 from them, then takes the unique set.
    """
    tmp = []
    for f in filenames:
        assert '.jpg' in f, 'Only .jpg is supported: %s' % f
        fn = f.split('.jpg')[0] + '.jpg'
        tmp.append(fn)
    filenames = tmp
            
    unique_filenames, indices = np.unique(filenames, return_index=True)
    unique_mat = mat[indices]
    return unique_filenames, unique_mat


def NN_sort(dist_mat, filenames):
    """Sorts dist_mat and filenames based on first-NN distance."""
    assert dist_mat.shape[0] == len(filenames)
    d_NN = np.min(dist_mat, axis=1)
    ind_sort = np.argsort(d_NN)
    dist_mat = dist_mat[ind_sort]
    filenames = filenames[ind_sort]
    return dist_mat, filenames

In [3]:
# Load features and filenames 
dirname = '/archive/esteva/experiments/skindata4/inflammatory2/retrieve'

# Train Data
train_filenames = os.path.join(dirname, 'train-filenames.txt')
train_filenames = readtxt(train_filenames)

train_features = os.path.join(dirname, 'train-features.npy')
train_features = readmat(train_features)

train_filenames, train_features = keep_unique_filenames(
    train_filenames, train_features)

# Validation Data
val_filenames = os.path.join(dirname, 'validation-filenames.txt')
val_filenames = readtxt(val_filenames)

val_features = os.path.join(dirname, 'validation-features.npy')
val_features = readmat(val_features)

val_filenames, val_features = keep_unique_filenames(
    val_filenames, val_features)


In [25]:
# Find nearest neighbors and sort distance matrix and val_filenames by 
# first nearest neighbor distance
t = time.time()
dist_mat = euclidean_distances(val_features, train_features)
dist_mat, val_filenames = NN_sort(dist_mat, val_filenames)
print time.time() - t

2.03960800171


In [30]:
t = time.time()
index_mat = np.argsort(dist_mat, axis=1)
print time.time() - t

10.8288450241


In [31]:
print dist_mat.shape
print index_mat.shape

(4704, 33521)
(4704, 33521)


In [None]:
# Sort 

In [32]:
# Put filenames from val and train into quilt format
quilt_paths = []
N = 10
image_dir = '/ssd/esteva/skindata4/images'
for fn, indices in zip(val_filenames, index_mat):
    q = [fn]
    q.extend(train_filenames[indices[:N]].tolist())
    q = [addFullImagePath(qq) for qq in q]
    quilt_paths.append(q)
    

In [72]:
def save_quilt(quilt_paths, M=50):
    """Saves quilted image, M vertical thumbnails at a time."""
    N = len(quilt_paths)
    
    for i in range(0, N, M):
        print '\rQuilting %d/%d' % (i+M, N),
        quilt = retrieval.quiltTheImages(quilt_paths[i:i+M])
        ext = SAVE_NAME.split('.')[-1]
        fn = "".join(SAVE_NAME.split('.')[:-1]) +'-' + str(i+M) + '.' + ext
        print fn
        quilt.save(fn)

In [73]:
save_quilt(quilt_paths)

Quilting 50/4704 /tmp/retrieval/quilt-50.jpg
Quilting 100/4704 /tmp/retrieval/quilt-100.jpg
Quilting 150/4704 /tmp/retrieval/quilt-150.jpg
Quilting 200/4704 /tmp/retrieval/quilt-200.jpg
Quilting 250/4704 /tmp/retrieval/quilt-250.jpg
Quilting 300/4704 /tmp/retrieval/quilt-300.jpg
Quilting 350/4704 /tmp/retrieval/quilt-350.jpg
Quilting 400/4704 /tmp/retrieval/quilt-400.jpg
Quilting 450/4704 /tmp/retrieval/quilt-450.jpg
Quilting 500/4704 /tmp/retrieval/quilt-500.jpg
Quilting 550/4704 /tmp/retrieval/quilt-550.jpg
Quilting 600/4704 /tmp/retrieval/quilt-600.jpg
Quilting 650/4704 /tmp/retrieval/quilt-650.jpg
Quilting 700/4704 /tmp/retrieval/quilt-700.jpg
Quilting 750/4704 /tmp/retrieval/quilt-750.jpg
Quilting 800/4704 /tmp/retrieval/quilt-800.jpg
Quilting 850/4704 /tmp/retrieval/quilt-850.jpg
Quilting 900/4704 /tmp/retrieval/quilt-900.jpg
Quilting 950/4704 /tmp/retrieval/quilt-950.jpg
Quilting 1000/4704 /tmp/retrieval/quilt-1000.jpg
Quilting 1050/4704 /tmp/retrieval/quilt-1050.jpg
Quilting 11

In [49]:
quilt = retrieval.quiltTheImages(quilt_paths[1000:1010])
a = np.asarray(quilt)

plt.imshow(a)