In [1]:
import tensorflow
import tensorflow.keras as keras
import tensorflow.keras.applications as ka
import numpy as np
import sys
from keras.applications.resnet import decode_predictions
from sklearn.model_selection import train_test_split
import pickle

import matplotlib
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"]=4,4

In [2]:
# Create base resnet model:
base_model = ka.ResNet50(
                        include_top=True,
                        weights='imagenet',
                        input_shape = (224, 224, 3)
                        )

# Create additional model that adds 2048 layer to output:
model = keras.Model(inputs = base_model.input, outputs = [base_model.layers[-1].output, base_model.layers[-2].output])

# Define optimizer and compile:
opt = keras.optimizers.Adam(learning_rate = 0.0001)
base_model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = 'accuracy')
model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = 'accuracy')

# View models for debug:
# model.summary()
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [3]:
# Image Source Directory - here we use the resnet class structure:
directory = './ImageNet/organized_validation_resnet/'
class_order = [str(x) for x in range(1, 1001)]

# Load in resnet preprocess input function:
resnet_preprocess = keras.applications.resnet.preprocess_input

# Create ImageDataGeneratore:
val_id_gen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function = resnet_preprocess)

# Create generator used to load validation set into memory:
val_gen = val_id_gen.flow_from_directory(
                                        directory, 
                                        target_size=(224, 224),
                                        color_mode='rgb', 
                                        classes= class_order,
                                        batch_size=1, 
                                        shuffle=False, 
                                        seed=13,
                                        )

Found 50000 images belonging to 1000 classes.


In [4]:
# Allocate memory for large np arrays:
x_val = np.zeros((10000, 224, 224, 3))
y_val = np.zeros((10000, 1000))

In [5]:
# Loop through all 50000 images in validation dataset (5 executions of outer loop):
for z in range(1, 6):
    # Process images in batches of 10000 at a time:
    for i in range(10000):
        if i % 1000 == 0:
            print(i)
        
        # Fill in preallocated numpy arrays:
        x, y = val_gen.next()
        x_val[i, :, :, :] = x
        y_val[i, :] = y

    # Make predictions on model with 1000 and 2048 vector outputs:
    out = model.predict(x_val)

    # Check:
#     print(out.shape)

    # Add dictionary with out and actual y to dump:
    pickle.dump( out, open( "./encodings/pretrained_resnet/imagenet_val_2048_encoding_" + str(z) + ".p", "wb" ), -1)


0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
0
1000
2000
3000
4000
5000
6000
7000
8000
9000


In [None]:
# print(out.shape)

# for pred in out:
# #     print(pred.shape)
#     pred = pred.reshape(1, 1000)
#     decoded = keras.applications.resnet.decode_predictions(
#         pred, top=5
#     )
# #     print(decoded)

In [None]:
# print(out_intermediate[2].shape)

In [None]:
# # out = out_intermediate

# dist_from_first = []
# image_dict = {}
# index = 1444
# first_features = out[index]
# first_image = x_val[index][:,:,:]
# print(first_image.shape)

# # hash images:
# # for idx in range(x_val.shape[0]):
# #     image_dict[str(out[idx])] = x_val[idx] 

# plt.clf()
# plt.imshow(first_image/255)
# plt.show()

# # # for i in range(400):
# # #     if i % 25 == 0:
# # #         print(i)
# for idx, item in enumerate(out):
# #     print(first_features.shape)
# #     print(item.shape)
#     dist_from_first.append((np.linalg.norm(first_features-item), (str(item))))

# # dist_from_first.sort()
# print(dist_from_first[0])

# # for i in range(20):
# #     plt.clf()
# #     img = image_dict[dist_from_first[i][1]]
# #     plt.imshow(img/255)
# #     plt.show()