In [1]:
import numpy as np
import pandas as pd
import pickle
from lshash.lshash import LSHash

import time

In [None]:
with open('Data/feature_matrix/fc6_full_dress.p','rb') as f:
    feat_dresses = pickle.load(f)

In [None]:
lsh = LSHash(8,feat_dresses.shape[1])

In [None]:
start = time.time()
for i in range(feat_dresses.shape[0]):
    lsh.index(feat_dresses[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

In [None]:
start = time.time()
r = lsh.query(feat_dresses[101,:], num_results=10, distance_func="euclidean")
print('{:.3f}'.format(time.time()-start))

In [None]:
lsh = LSHash(8,feat_dresses.shape[1])
bin_dresses = (feat_dresses>0)

start = time.time()
for i in range(bin_dresses.shape[0]):
    lsh.index(list(bin_dresses[i,:]),extra_data = i)
print('{:.3f}'.format(time.time()-start))

In [None]:
start = time.time()
r = lsh.query(feat_dresses[101,:], num_results=10, distance_func="hamming")
print('{:.3f}'.format(time.time()-start))

In [None]:
r = lsh.query(feat_dresses[100,:], num_results=10, distance_func="hamming")
print('H {}'.format(r[1][1]))
r = lsh.query(feat_dresses[100,:], num_results=10)
print('H {}'.format(r[1][1]))


In [None]:
len(r)

In [None]:
list(bin_dresses[101,:])

In [None]:
from IPython.display import display
import PIL
from PIL import ImageDraw



def draw_bbox(bbox):
    img = PIL.Image.open('Data/DeepFashion/'+bbox['image_name']).convert('RGBA')
    
    lx = bbox['x_1']
    ly = bbox['y_1']
    ux = bbox['x_2']
    uy = bbox['y_2']
    rect = PIL.Image.new('RGBA', img.size, (255,255,255,0))
    d = ImageDraw.Draw(img)
    d.rectangle(((lx,ly), (ux,uy)), outline="red")
    return PIL.Image.alpha_composite(img,rect)

def gen_pairs(N,image_df,lsh,feature_matrix):
    img_index = np.random.randint(image_df.shape[0],size=N)

    for i in img_index:
        j = 0
        match = lsh.query(feature_matrix[i,:], num_results=N+5)
        dist = match[j][1]
        n = match[j][0][1]
        while dist<=0:
            j += 1
            dist = match[j][1]
            n = match[j][0][1]

        display(draw_bbox(image_df.iloc[i]))
        display(draw_bbox(image_df.iloc[n]))

In [2]:
bbox = pd.read_table('Data/DeepFashion/list_bbox.txt',sep='\s+')
cat = pd.read_table('Data/DeepFashion/list_category_img.txt',sep='\s+')
dresses = bbox[cat['category_label'] == 41]

In [None]:
gen_pairs(3,dresses,lsh,feat_dresses)

In [None]:
lsh16 = LSHash(16,feat_dresses.shape[1])

start = time.time()
for i in range(feat_dresses.shape[0]):
    lsh16.index(feat_dresses[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

In [None]:
gen_pairs(3,dresses,lsh16,feat_dresses)

In [3]:
def test_pairs(N,image_df,lsh,feature_matrix):
    img_index = np.random.randint(image_df.shape[0],size=N)
    cat_match = []
    for i in img_index:
        j = 0
        match = lsh.query(feature_matrix[i,:], num_results=N+5)
        dist = match[j][1]
        n = match[j][0][1]
        while (dist<=0 & j<len(match)-1):
            j += 1
            dist = match[j][1]
            n = match[j][0][1]

        cat_match.append(image_df.iloc[i]['category_label'] == image_df.iloc[n]['category_label'])
        
    return cat_match

# FULL SET


In [4]:
class MacOSFile(object):
    def __init__(self, f):
        self.f = f

    def __getattr__(self, item):
        return getattr(self.f, item)

    def read(self, n):
        if n >= (1 << 31):
            buffer = bytearray(n)
            pos = 0
            while pos < n:
                size = min(n - pos, 1 << 31 - 1)
                chunk = self.f.read(size)
                buffer[pos:pos + size] = chunk
                pos += size
            return buffer
        return self.f.read(n)

In [5]:
df = bbox.join(cat,lsuffix='_',rsuffix='')

In [6]:
with open('Data/feature_matrix/fc6_full_set.p','rb') as f:
    feat = pickle.load(MacOSFile(f))

In [7]:
n = np.reshape(np.sqrt(np.sum(feat**2,axis=1)),(-1,1))

In [8]:
feat_norm = feat/n

## LSH 8

In [10]:
lsh8 = LSHash(8,feat.shape[1])

In [11]:
start = time.time()
for i in range(feat.shape[0]):
    lsh8.index(feat[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

107.985


In [13]:
start = time.time()
r = lsh8.query(feat[100,:], num_results=10)
print('{:.3f}'.format(time.time()-start))

3.168


In [14]:
start = time.time()
matches = test_pairs(100,df,lsh8,feat)
print('{:.3f} s'.format(time.time()-start))
print('{:.3f} match'.format(np.mean(matches)))

287.934 s
0.530 match


## LSH 8 Normalized Vector

In [9]:
lsh8 = LSHash(8,feat.shape[1])

In [10]:
start = time.time()
for i in range(feat.shape[0]):
    lsh8.index(feat_norm[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

121.935


In [11]:
start = time.time()
matches = test_pairs(1000,df,lsh8,feat_norm)
print('{:.3f} s'.format(time.time()-start))
print('{:.3f} match'.format(np.mean(matches)))

2562.426 s
0.584 match


In [26]:
start = time.time()
r = lsh8.query(feat_norm[101,:], num_results=10,distance_func='euclidean')
print('{:.3f}'.format(time.time()-start))

1.731


## LSH 12

In [7]:
lsh12 = LSHash(12,feat.shape[1])
start = time.time()
for i in range(feat.shape[0]):
    lsh12.index(feat[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

113.795


In [8]:
start = time.time()
r = lsh12.query(feat[100,:], num_results=10)
print('{:.3f}'.format(time.time()-start))

0.623


In [9]:
start = time.time()
matches = test_pairs(1000,df,lsh12,feat)
print('{:.3f} s'.format(time.time()-start))
print('{:.3f} match'.format(np.mean(matches)))

393.631 s
0.527 match


## LSH 12 Normalized vector

In [9]:
lsh12 = LSHash(12,feat_norm.shape[1])
start = time.time()
for i in range(feat_norm.shape[0]):
    lsh12.index(feat_norm[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

113.865


In [10]:
start = time.time()
matches = test_pairs(1000,df,lsh12,feat_norm)
print('{:.3f} s'.format(time.time()-start))
print('{:.3f} match'.format(np.mean(matches)))

346.085 s
0.553 match


## LSH 16

In [7]:
lsh16 = LSHash(16,feat.shape[1])
start = time.time()
for i in range(feat.shape[0]):
    lsh16.index(feat[i,:],extra_data = i)
print('{:.3f}'.format(time.time()-start))

113.558


In [13]:
start = time.time()
r = lsh16.query(feat[100,:], num_results=10)
print('{:.3f}'.format(time.time()-start))
print(r[1][1])

0.024
111.052812539


In [19]:
start = time.time()
matches = test_pairs(1000,df,lsh16,feat)
print('{:.3f} s'.format(time.time()-start))
print('{:.3f} match'.format(np.mean(matches)))

58.639 s
0.512 match


In [25]:
list(m[0][1] for m in r[0:3])

[100, 28121, 22351]

In [31]:
def write_pairs(N,k,image_df,lsh,feature_matrix):
    img_index = np.random.randint(image_df.shape[0],size=N)
    
    pairs = []
    
    for i in img_index:
        j = 0
        match = lsh.query(feature_matrix[i,:], num_results=N+5)
        dist = match[j][1]
        n = list(m[0][1] for m in match[j:(j+k)])
        while (dist<=0 & j<len(match)-1):
            j += 1
            dist = match[j][1]
            n = list(m[0][1] for m in match[j:(j+k)])
        
        s = image_df.iloc[i]['image_name']
        for m in n:
            s = s + "\t" + image_df.iloc[m]['image_name']
        pairs.append(s)
        
    return pairs

In [32]:
with open('fc6_full_LSH.txt', 'w') as f:
    f.write('\n'.join(write_pairs(300,3,df,lsh16,feat)))

In [27]:
feat.shape[0]

289222