###### A few imports

In [2]:
import numpy as np
import tensorflow as tf
import os
from lshutils import *
import matplotlib.pyplot as plt
%matplotlib inline
from bokeh.plotting import figure,output_file,show

In [3]:
data=Dataset('mnist') #data prep

Extracting ./MNIST/train-images-idx3-ubyte.gz
Extracting ./MNIST/train-labels-idx1-ubyte.gz
Extracting ./MNIST/t10k-images-idx3-ubyte.gz
Extracting ./MNIST/t10k-labels-idx1-ubyte.gz


###### Define hyperparameters

In [11]:
input_dim=784 #d
max_index=1000
sampling_ratio=0.10
nnn=20 #number of nearest neighbours to compare, 2% of max_index as in paper
hash_lengths=[2,4,8,12,16,20,24,28,32]
map_points=500
get_weights='dec'

In [5]:
inputs_=data.data.train.images[:max_index]

###### Use this cell to compare normal LSH and fly algorithm

In [None]:
all_MAPs={}
for hash_length in hash_lengths: #k
    embedding_size= int(20*hash_length) #int(10*input_dim) #20k or 10d
    all_MAPs[hash_length]={}
    all_MAPs[hash_length]['Fly']=[]
    all_MAPs[hash_length]['LSH']=[]
    for _ in range(20):
        fly_model=flylsh(inputs_,hash_length,sampling_ratio,embedding_size)
        fly_mAP=fly_model.findmAP(nnn,map_points)
        msg='mean average precision is equal to {:.2f}'.format(fly_mAP)
        #_=os.system('say "'+msg+'"') #works only on mac
        all_MAPs[hash_length]['Fly'].append(fly_mAP)
        
        dense_model=LSH(inputs_,hash_length)
        dense_mAP=dense_model.findmAP(nnn,map_points)
        all_MAPs[hash_length]['LSH'].append(dense_mAP)
        msg='mean average precision is equal to {:.2f}'.format(dense_mAP)
        #_=os.system('say "'+msg+'"') #works only on mac
    print(f'{hash_length} done')
        
#print(all_MAPs)
plot_mAP(all_MAPs,hash_lengths,keys=['Fly','LSH'])

###### These cells allow experimenting with hyperparameters of autoencoder

In [8]:
def make_grid(originals,recons):
    fig, axes = plt.subplots(nrows=2, ncols=len(originals), sharex=True, sharey=True, figsize=(20,4))
    for images, row in zip([originals, recons], axes):
        for img, ax in zip(images, row):
            ax.imshow(img.reshape((28, 28)), cmap='Greys_r')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
    fig.tight_layout(pad=0.1)

In [12]:
denseae=AutoEncoder([784,400],is_sparse=True,beta=100.0)
decoder=denseae.train(data,max_index,get_weights)

In [None]:
bins,edges=np.histogram((decoder-np.min(decoder))/(np.max(decoder)-np.min(decoder)),bins=100)
plt.plot(edges[:-1],bins,color='red')
plt.show()

In [None]:
net=AutoEncoder([784,400],is_sparse=True,beta=100.0,dec_weights=decoder)
all_weights= net.train(data,max_index,show_recon=0)
#all_weights,(originals,recons)=net.train(data,maxsize=10_000,show_recon=True)
#make_grid(originals,recons) #please uncomment these two lines to see the reconstructions

In [None]:
bins,edges=np.histogram((all_weights-np.min(all_weights))/(np.max(all_weights)-np.min(all_weights)),bins=100)
plt.plot(edges[:-1],bins,color='red') #Distribution of weights
plt.show()

###### Compare Fly LSH with random weights v/s weights learnt from Autoencoder 

In [None]:
all_MAPs={}
for hash_length in hash_lengths: #k
    embedding_size= int(20*hash_length) #int(10*input_dim) #20k or 10d
    all_MAPs[hash_length]={}
    all_MAPs[hash_length]['Fly']=[]
    all_MAPs[hash_length]['DenseAEFly']=[]
    dense=AutoEncoder([784,embedding_size],is_sparse=False,beta=100.0)
    d_weights= dense.train(data,maxsize=10_000,show_recon=0)
    for _ in range(5):
        daefly_model=AEflylsh(inputs_,hash_length,sampling_ratio,d_weights)
        daefly_mAP=daefly_model.findmAP(nnn,map_points)
        all_MAPs[hash_length]['DenseAEFly'].append(daefly_mAP)
        
        fly_model=flylsh(inputs_,hash_length,sampling_ratio,embedding_size)
        fly_mAP=fly_model.findmAP(nnn,map_points)
        all_MAPs[hash_length]['Fly'].append(fly_mAP)
        
    print(f'{hash_length} done')
        
#print(all_MAPs)

In [None]:
plot_mAP(all_MAPs,hash_lengths,keys=['Fly','DenseAEFly'])

###### Compare autoencoder trained with sparse weights with normal autoencoder

In [None]:
all_MAPs={}
for hash_length in hash_lengths: #k
    embedding_size= int(20*hash_length) #int(10*input_dim) #20k or 10d
    all_MAPs[hash_length]={}
    all_MAPs[hash_length]['SparseAEFly']=[]
    all_MAPs[hash_length]['DenseAEFly']=[]
    dense=AutoEncoder([784,embedding_size],is_sparse=False,beta=100.0)
    d_weights= dense.train(data,maxsize=10_000,show_recon=0)
    sparse=AutoEncoder([784,embedding_size],is_sparse=True,beta=100.0)
    s_weights= sparse.train(data,maxsize=10_000,show_recon=0)
    for _ in range(5):
        daefly_model=AEflylsh(inputs_,hash_length,sampling_ratio,d_weights)
        daefly_mAP=daefly_model.findmAP(nnn,map_points)
        all_MAPs[hash_length]['DenseAEFly'].append(daefly_mAP)
        
        saefly_model=AEflylsh(inputs_,hash_length,sampling_ratio,s_weights)
        saefly_mAP=saefly_model.findmAP(nnn,map_points)
        all_MAPs[hash_length]['SparseAEFly'].append(aefly_mAP)
        
    #print(f'mAP={np.mean(all_MAPs[hash_length]["AEFly"])}')
    print(f'{hash_length} done')
        
#print(all_MAPs)

In [None]:
plot_mAP(all_MAPs,[2,4,8,12,16],keys=['SparseAEFly','DenseAEFly'])