In [14]:
import numpy as np
import pandas as pd
import sys
import pickle
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.rcParams["figure.dpi"]= 300
mpl.rc('axes.spines',top=False,bottom=False,left=False,right=False);
mpl.rc('axes',facecolor=(0,0,0,0),edgecolor=(0,0,0,0));
mpl.rc(('xtick','ytick'),color=(0,0,0,0));
import time
import PIL
import os
import json
from annoy import AnnoyIndex
import subprocess

import tensorflow
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.applications as ka
from keras.applications.resnet import decode_predictions

class searchEngine():

    def __init__(self, encoding_user_set = 'resnet_1000'):

        self.class_key = pd.read_csv('/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/imagenet_resnet_key.csv')
        self.encoding_type = encoding_user_set
        self.set_src()
        try:
            self.load_database()
        except:
            print('Must generate encodings for this setting.')

    def brute_force_search_index(self, query_index, num_results, show_flag, normalized=False):

        # Query vector:
        query_vec = self._values[query_index]

        # Query key:
        query_key = self._keys[query_index]

        # Begin timer:
        start_time = time.time()

        # Use Numpy to calculate distance values:
        dist_to_query = []
        if normalized == True:
            dist_to_query = np.linalg.norm((query_vec/np.linalg.norm(query_vec) - 
                                            np.array(searcher._values)/np.linalg.norm(np.array(searcher._values), axis = 1).reshape(-1, 1)), axis = 1)
        else:
            dist_to_query = np.linalg.norm((query_vec - np.array(self._values)), axis = 1)

        # Rank by distance:
        dist_ranking = np.argsort(dist_to_query)

        # End timer:
        end_time = time.time()
            
        if show_flag == 1:
            self.show_results(query_index, dist_ranking, num_results)

    def show_results(self, query_idx, dist_rank, num_results):
        
        query_img = PIL.Image.open(self._image_dir + self._keys[query_idx])

        print(self._keys[query_idx])

        concat_img = np.array(query_img.resize((int((600/query_img.height)*query_img.width), 600)))

        for i in range(1, num_results + 1):
            result_idx = dist_rank[i]
            result_img = PIL.Image.open(self._image_dir + self._keys[result_idx])
            result_img = result_img.resize((int((600/result_img.height)*result_img.width), 600))

            if len(np.array(result_img).shape) == 2:
                result_img = np.stack([result_img, result_img, result_img], axis = -1)
            elif np.array(result_img).shape == (224, 224, 4):
                result_img = np.array(result_img)
                result_img = result_img[:, :, 0:3]


            concat_img = np.concatenate([concat_img, np.zeros((600, 60, 3)), result_img], axis = 1)

            print(self._keys[result_idx])

        plt.clf()
        plt.imshow(concat_img/255)
        plt.show()
        
    def annoy_search_index(self, query_index, num_results, show_flag):
        
        # Query vector:
        query_vec = np.array(self._values[query_index])

        # Query key:
        query_key = self._keys[query_index]

        # Begin timer:
        start_time = time.time()

        # Rank by distance:
        dist_ranking = self._annoy_forest.get_nns_by_vector(query_vec, num_results+1)

        # End timer:
        end_time = time.time()
        
        if show_flag == 1:
            self.show_results(query_index, dist_ranking, num_results)


    def distributed_search(self, query_index, num_results, cluster_id):
        
        # Query vector:
        query_vec = self._values[query_index]
        query_vec_str = ''
        for elem in query_vec:
            query_vec_str += str(elem) + ' '
        query_vec_str = query_vec_str[:-1]
#         print(query_vec_str)
        
        f = open("vec.txt","w")
        f.write(query_vec_str)
        
#         subprocess.run("python3 distributed_prototype.py " + str(self._s3_path) + 
#                        " -r emr --cluster-id=" + 
#                        str(cluster_id) + 
#                        " --no-output --no-read-logs --region=us-east-2 --query=" + 
#                        str(query_vec) + " " +
#                        " > output.txt", shell = True)

        subprocess.run("python3 distributed_prototype.py " + 
                       str(self._s3_path) + 
                       " -r emr --cluster-id=" + 
                       str(cluster_id) + 
                       " --no-read-logs --region=us-east-2 --query=vec.txt > output.txt", 
                       shell = True)
    
    
#         with open('output.txt') as f:
#             for line in f:
#                 out = list(str(line).replace('[','').
#                                      replace(']','').
#                                      replace(',', '').
#                                      replace('"','').
#                                      replace('1\t','').
#                                      replace('\n', '').split(" "))

#         print(out[1::2])
        
#         if show_flag == 1:
#             query_img = PIL.Image.open(self._image_dir + self._keys[query_idx])

#             plt.clf()
#             plt.imshow(query_img)
#             plt.show()

    def build_annoy_forest(self, tree_count, seed = 13):
        
        self._tree_count = tree_count
        t = AnnoyIndex(len(self._values[0]), 'angular')
        t.set_seed(seed)

        for idx, item in enumerate(self._values):
            t.add_item(idx, np.array(item))
            
        t.build(tree_count)
        self._annoy_forest = t

    def load_database(self):

        with open(self._src) as f:
            db =  json.load(f)

        self._values = np.array([x for x in db.values()])
        self._keys = list(db.keys())
        
        if self.encoding_type == 'combined':
            
            resnet_enc_norm = self._values/np.linalg.norm(self._values, axis = 1).reshape(-1, 1)
            
            print(resnet_enc_norm.shape)
            
            self.encoding_type = 'ae_bottleneck_2048'
            self.set_src()
            self.encoding_type = 'combined'
            
            with open(self._src) as f:
                db =  json.load(f)
            
            ae_enc_norm = np.array([x for x in db.values()])/np.linalg.norm(np.array([x for x in db.values()]),
                                                                            axis = 1).reshape(-1, 1)
            
            print(ae_enc_norm.shape)
            
            self._values = np.concatenate([resnet_enc_norm, ae_enc_norm], axis = 1)
            
            print(self._values.shape)
        
    def load_encoder(self, take_max='Yes'):
        '''
        Loads the original model and uses its paramemters to compile the encoding layers.
        
        Only available for relevant encoding types.
        '''
        
        if self.encoding_type == 'resnet_1000':
            self._encoderModel = ka.ResNet50(weights='imagenet',
                                             input_shape = (224, 224, 3))
            
        elif self.encoding_type == 'resnet_2048':
            underlying_model = ka.ResNet50(weights='imagenet',
                                             input_shape = (224, 224, 3))

            self._encoderModel = keras.Model(inputs = underlyingModel.input, 
                                             outputs = underlyingModel.layers[-2].output)
            
        elif self.encoding_type == 'ae_simple_2048' and take_max == 'Yes':
            underlyingModel = keras.models.load_model(
                                    '/Users/ChrisPenny/Downloads/AE_Simple_F4_0001_Final.h5')

            self._encoderModel = keras.Model(inputs = underlyingModel.input, 
                                             outputs = keras.layers.GlobalMaxPool2D()
                                             (underlyingModel.layers[-12].output))
            
        elif self.encoding_type == 'ae_simple_2048' and take_max == 'No':
            underlyingModel = keras.models.load_model(
                                    '/Users/ChrisPenny/Downloads/AE_Simple_F4_0001_Final.h5')
            
            self._encoderModel = keras.Model(inputs = underlyingModel.input, 
                                             outputs = underlyingModel.layers[-12].output)

#         elif self.encoding_type == 'ae_dense_2048' and take_max == 'Yes':
#             underlyingModel = keras.models.load_model(
#                                     '/Users/ChrisPenny/Downloads/AE_Dense_F4_0001_Final.h5')

#             self._encoderModel = keras.Model(inputs = underlyingModel.input, 
#                                              outputs = keras.layers.GlobalMaxPool2D()
#                                              (underlyingModel.layers[-12].output))
            
#         elif self.encoding_type == 'ae_dense_2048' and take_max == 'No':
#             underlyingModel = keras.models.load_model(
#                                     '/Users/ChrisPenny/Downloads/AE_Dense_F4_0001_Final.h5')
            
#             self._encoderModel = keras.Model(inputs = underlyingModel.input, 
#                                              outputs = underlyingModel.layers[-12].output)
            
        elif self.encoding_type == 'ae_bottleneck_2048':
            underlyingModel = keras.models.load_model(
                                    '/Users/ChrisPenny/Downloads/AE_Bottleneck_F4_0001_Final.h5')
            
            self._encoderModel = keras.Model(inputs = underlyingModel.input, 
                                             outputs = underlyingModel.layers[-15].output)
        else:
            print('No model matching that encoding type.')
            return

        self._encoderModel.compile()

    def prep_image(self, path):
    
        im = PIL.Image.open(path)
        im_resize = im.resize((224, 224))
        if np.array(im_resize).shape == (224, 224):
            im_resize = np.stack([im_resize, im_resize, im_resize], axis = -1)
        elif np.array(im_resize).shape == (224, 224, 4):
            im_resize = np.array(im_resize)
            im_resize = im_resize[:, :, 0:3]
        
        return np.array(im_resize).reshape((1, 224, 224, 3))
    
    def encoder_search(self, query_url, num_results, show = 1):
        
        np_image = self.prep_image(query_url)
        
        query_encoding = self._encoderModel.predict(np_image)
        
        self.build_annoy_forest(10)
        
        # Rank by distance:
        dist_ranking = self._annoy_forest.get_nns_by_vector(query_encoding[0], num_results+1)

#         self.show_results(query_index, dist_ranking, num_results)

        if show == 1:

            im = PIL.Image.open(query_url)

            plt.clf()
            plt.imshow(im)
            plt.show()

            for i in range(1, num_results + 1):
                result_idx = dist_ranking[i]
                result_img = PIL.Image.open(self._image_dir + self._keys[result_idx])

                plt.clf()
                plt.rcParams['figure.figsize'] = [12, 8]
                plt.imshow(result_img)
                plt.show()

                print(self._keys[result_idx])

    def load_decoder(self):
        '''
        Loads the original model and uses its paramemters to compile the decoding layers.
        
        Only available for relevant encoding types.
        '''
        
        if self.encoding_type == 'ae_bottleneck_2048':
            testModel = keras.models.load_model('/Users/ChrisPenny/Downloads/AE_Bottleneck_F4_0001_Final.h5')
            self._decoderModel = keras.Model(inputs = underlyingModel.layers[-14].input, 
                                             outputs = underlyingModel.layers[-1].output)
            
    def reconstruct_image(self):
        pass
            
    def save_scores(self, search_mode = 'annoy', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000):
        '''
        Scores encodings by ImageNet class as mean average precision.

        Only valid for validation directories - otherwise leaky.
        '''
        
        # Precompute normalization of each encoding:
        # Saves a lot of runtime.
        if normalized == True:
            self._values_normed = self._values/np.linalg.norm(self._values, axis = 1).reshape(-1, 1)

        # Dictionary keeps track of scores by class:
        score_dict = {}

        for idx in range(num_classes*50):
            
            if idx % 1000 == 0:
                print(idx)

            # Query info:
            query_vec = self._values[idx]
            query_key = self._keys[idx]
            query_class = query_key[:query_key.find('/')]

            # Begin timer:
            start_time = time.time()

            # Calculate distance values:
            dist_to_query = []

            if search_mode == 'brute_force':
                if normalized == True:
                    dist_to_query = np.linalg.norm((query_vec/np.linalg.norm(query_vec) - 
                                                    self._values_normed, 
                                                    axis = 1)
                else:
                    dist_to_query = np.linalg.norm(query_vec - self._values, 
                                                   axis = 1)
                # Rank by distance:    
                dist_ranking = np.argsort(dist_to_query)
            elif search_mode == 'annoy':
                dist_ranking = self._annoy_forest.get_nns_by_vector(query_vec, np.max(ranks) + 1)
            else:
                print("Not a valid search mode")
                return

            # End timer:
            end_time = time.time()

            # Compare classes:
            correct = 0
            rank_idx = 0
            # Note: First result is the image itself, so we skip it.
            for jdx in range(1, np.max(ranks) + 1):

                result_key = self._keys[dist_ranking[jdx]]
                result_class = result_key[:result_key.find('/')]

                if result_class == query_class:
                    correct += 1

                if ranks[rank_idx] == jdx:

                    if query_class not in score_dict:
                        score_dict[query_class] = {}
                        score_dict[query_class][ranks[rank_idx]] = []
                    elif ranks[rank_idx] not in score_dict[query_class]:
                        score_dict[query_class][ranks[rank_idx]] = []

                    score_dict[query_class][ranks[rank_idx]].append(correct/ranks[rank_idx])#, end_time - start_time))

                    rank_idx += 1
                
                score_dict['time'] = end_time - start_time
                
        if search_mode == 'annoy':
            with open(search_mode + "_" + str(self._tree_count) + "_" + self.encoding_type + "_scores.txt", 'w') as outfile:
                json.dump(score_dict, outfile)             
        else:
            if normalized == True:
                with open(search_mode + "_normalized_" + self.encoding_type + "_scores.txt", 'w') as outfile:
                    json.dump(score_dict, outfile)        
            else:
                with open(search_mode + "_" + self.encoding_type + "_scores.txt", 'w') as outfile:
                    json.dump(score_dict, outfile)        

    def generate_encodings(self):
        '''
        generate_encodings
        '''
        
        
        cwd = os.getcwd()
        cwd += '/ImageNet/organized_validation_resnet/'

        files_all = []

        for i in range(1,1001):
            files = os.listdir(cwd + str(i))  # Get all the files in that directory
            files_all += [str(i) + '/' + x for x in files]

        encodings_dict = {}

        for idx in range(len(files_all)):

            if idx % 1000 == 0:
                print(idx)

            path = cwd + files_all[idx]
            im = PIL.Image.open(path)
            im_resize = im.resize((224, 224))
            if np.array(im_resize).shape == (224, 224):
                im_resize = np.stack([im_resize, im_resize, im_resize], axis = -1)
            elif np.array(im_resize).shape == (224, 224, 4):
                im_resize = np.array(im_resize)
                im_resize = im_resize[:, :, 0:3]
            np_im = np.array(im_resize).reshape((1, 224, 224, 3))

            out = self._encoderModel.predict(np_im)

            encodings_dict[files_all[idx]] = list([float(x) for x in out[0]])
    
        with open(self._src, 'w') as outfile:
            json.dump(encodings_dict, outfile)
        
    def set_src(self):
        '''
        Sets a hardcoded path according to user encoding type setting.
        '''
        
        if self.encoding_type == 'resnet_1000':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/validation_pretrained_resnet/resnet50_validation_1000.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
            self._s3_path = 's3://compressedencodings/resnet50_validation_1000_pure_text.txt'
        elif self.encoding_type == 'resnet_2048':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/validation_pretrained_resnet/resnet50_validation_2048.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
        elif self.encoding_type == 'ae_simple_2048':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/ae_simple_validation_2048.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
        elif self.encoding_type == 'ae_dense_2048':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/ae_dense_validation_2048.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
        elif self.encoding_type == 'ae_bottleneck_2048':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/ae_bottleneck_validation_2048.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
        elif self.encoding_type == 'debug':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/DEBUG_ae_bottleneck_validation_2048.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
        elif self.encoding_type == 'combined':
            self._src = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/validation_pretrained_resnet/resnet50_validation_1000.txt'
            self._image_dir = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'
            
        
    def get_src(self):
        return self._src
        

# if __name__ == '__main__':




In [2]:
# ae_dense_searcher = searchEngine('ae_dense_2048')
# ae_dense_searcher.load_encoder()
# ae_dense_searcher.generate_encodings()

In [15]:
resnet_searcher = searchEngine('resnet_1000')
combined_searcher = searchEngine('combined')
ae_bottleneck_searcher = searchEngine('ae_bottleneck_2048')
ae_dense_searcher = searchEngine('ae_dense_2048')
ae_simple_searcher = searchEngine('ae_simple_2048')

(50000, 1000)
(50000, 2048)
(50000, 3048)


In [4]:
resnet_searcher.build_annoy_forest(1000)
combined_searcher.build_annoy_forest(1000)
ae_bottleneck_searcher.build_annoy_forest(1000)
ae_simple_searcher.build_annoy_forest(1000)

resnet_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
combined_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_bottleneck_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_simple_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)

resnet_searcher.build_annoy_forest(100)
combined_searcher.build_annoy_forest(100)
ae_bottleneck_searcher.build_annoy_forest(100)
ae_simple_searcher.build_annoy_forest(100)

resnet_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
combined_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_bottleneck_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_simple_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)

resnet_searcher.build_annoy_forest(10)
combined_searcher.build_annoy_forest(10)
ae_bottleneck_searcher.build_annoy_forest(10)
ae_simple_searcher.build_annoy_forest(10)

resnet_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
combined_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_bottleneck_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)
ae_simple_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)


In [17]:
resnet_searcher.save_scores(search_mode = 'brute_force', normalized = True, ranks = [5, 10, 20, 50], num_classes=1000)
# combined_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# ae_bottleneck_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# ae_dense_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# ae_simple_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)

# resnet_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# combined_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# ae_bottleneck_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)
# ae_simple_searcher.save_scores(search_mode = 'brute_force', normalized = False, ranks = [5, 10, 20, 50], num_classes=1000)

0


KeyboardInterrupt: 

In [None]:
combined_searcher.save_scores(search_mode = 'annoy', ranks = [5, 10, 20, 50], num_classes=1000)

In [None]:
def process_scores(self, search_mode, ranks = [5, 10, 20, 50]):
    
    score_src = search_mode + "_" + self.encoding_type + "_scores.txt"
    
    with open(score_src) as f:
        loaded_scores =  json.load(f)

    def map_at_rank(rank):

        class_averages = []

        for (k, v) in loaded_scores.items():
            class_averages.append(np.mean(v[rank]))

        return np.mean(class_averages)#, np.argsort(class_averages)[::-1]
    
    return map_at_rank('5'), map_at_rank('10'), map_at_rank('20')

print(process_scores(resnet_searcher, 'annoy'))
print(process_scores(combined_searcher, 'annoy'))

In [None]:
search_idx = np.random.randint(0,50000)

print(search_idx)

resnet_searcher.brute_force_search_index(query_index=search_idx, num_results=5, show_flag=1)
print('-----')
combined_searcher.brute_force_search_index(query_index=search_idx, num_results=5, show_flag=1)

# 18759
# 40728

In [None]:
# resnet_searcher.load_encoder()
# resnet_searcher.encoder_search("/Users/ChrisPenny/Desktop/IMG_3657.jpg", 10)

In [None]:
# searcher1.brute_force_search_index(query_index=1600, num_results=5, show_flag=1)
# searcher.brute_force_search_index(query_index=1593, num_results=5, show_flag=1)
# searcher1.brute_force_search_index(query_index=1605, num_results=5, show_flag=1)
# searcher.brute_force_search_index(query_index=1605, num_results=5, show_flag=1)

# searcher1.brute_force_search_index(query_index=45608, num_results=5, show_flag=1)
# print('-----')
# searcher.brute_force_search_index(query_index=45608, num_results=5, show_flag=1)

# searcher1.brute_force_search_index(query_index=9100, num_results=5, show_flag=1)
# print('-----')
# searcher.brute_force_search_index(query_index=9100, num_results=5, show_flag=1, normalized = True)

searcher1.brute_force_search_index(query_index=981, num_results=5, show_flag=1)
print('-----')
searcher.brute_force_search_index(query_index=981, num_results=5, show_flag=1, normalized = True)

In [None]:
searcher.build_annoy_forest(10, 'test.ann')

In [None]:
# searcher.annoy_search_index(query_index=601, num_results=5, show_flag=1)

searcher.annoy_search_index(query_index=6016, num_results=5, show_flag=1)

In [None]:
searcher.annoy_search_index(query_index=600, num_results=5, show_flag=1)

In [None]:
searcher.distributed_search(9901, 20, 'j-3HHZFA4CQZND8')

In [None]:
with open('output.txt') as f:
    for line in f:
        out = list(str(line).replace('[','').
                             replace(']','').
                             replace(',', '').
                             replace('"','').
                             replace('1\t','').
                             replace('\n', '').split(" "))
        
print(out[1::2])

In [None]:
with open('./vec.txt') as f:
    for line in f:
        out = line.split(" ")

out = [float(x) for x in out]
print(out)

In [None]:
import tensorflow
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.applications as ka

In [None]:
testModel = keras.models.load_model('/Users/ChrisPenny/Downloads/AE_Bottleneck_F4_0001_Final.h5')

testModel.summary()

In [None]:


# encoderModel = keras.Model(inputs = testModel.input, 
#                            outputs = keras.layers.GlobalMaxPool2D()
#                            (testModel.layers[-12].output))

encoderModel = keras.Model(inputs = testModel.input, 
                           outputs = testModel.layers[-15].output)

encoderModel.compile()

encoderModel.summary()

# for layer in testModel.layers:
#     print(layer.name)
    
# testModel.layers[-15].name

# out

In [None]:
# query_url =  "/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/9/ILSVRC2012_val_00027762.JPEG"

# query_url = "/Users/ChrisPenny/Desktop/IMG_3657.jpg"


# out = testModel.predict(np_im)



# print(len(encoding[0]))

# plt.clf()
# plt.imshow(np_im[0]/255)
# plt.show()


# plt.clf()
# plt.imshow(out[0]/255)
# plt.show()

In [None]:
# images = '/Users/ChrisPenny/Documents/MPCS53112/project_dir/ImageNet/organized_validation_resnet/'

# im = PIL.Image.open(query_url)
# im_resize = im.resize((224, 224))
# np_im = np.array(im_resize).reshape((1, 224, 224, 3))




In [None]:
cwd = os.getcwd()
cwd += '/ImageNet/organized_validation_resnet/'

files_all = []

for i in range(1,1001):
    files = os.listdir(cwd + str(i))  # Get all the files in that directory
    files_all += [str(i) + '/' + x for x in files]


encodings_dict = {}

for idx in range(49000, len(files_all)):
    
    if idx % 1000 == 0:
        print(idx)
    
    path = cwd + files_all[idx]
    im = PIL.Image.open(path)
    im_resize = im.resize((224, 224))
    if np.array(im_resize).shape == (224, 224):
        im_resize = np.stack([im_resize, im_resize, im_resize], axis = -1)
    elif np.array(im_resize).shape == (224, 224, 4):
        im_resize = np.array(im_resize)
        im_resize = im_resize[:, :, 0:3]
    np_im = np.array(im_resize).reshape((1, 224, 224, 3))
    
    out = self._encoderModel.predict(np_im)
    
    encodings_dict[files_all[idx]] = list([float(x) for x in out[0]])
    
with open('/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/DEBUG_ae_bottleneck_validation_2048.txt', 'w') as outfile:
    json.dump(encodings_dict, outfile)
        

In [None]:
with open('/Users/ChrisPenny/Documents/MPCS53112/project_dir/encodings/DEBUG_ae_bottleneck_validation_2048.txt', 'w') as outfile:
    json.dump(encodings_dict, outfile)