Generate encoded vectors for both query and artist aggregrate images

In [1]:
import read_rijksdata
import pandas as pd
import numpy as np
import tensorflow as tf
MIN_NUM_ARTWORK = 300


In [3]:
# LOAD IMAGE AND LABELS HERE
# replace for your path here!
img_folder = '/Users/erebor/Downloads/out_img'

images, labels_onehot, labels, names, = read_rijksdata.load_data(MIN_NUM_ARTWORK=MIN_NUM_ARTWORK,
                                                 img_folder = img_folder,
                                                 labels_file ='labels.txt',
                                                 names_file = 'names.txt')

total_bc =np.bincount(labels)

classes = len(list(set(labels)))
print('\n# of classes:',classes)

counts = pd.DataFrame(labels).value_counts()
print('Min # of artworks for all artists:',min(counts))
print('Min # of artworks specified:',MIN_NUM_ARTWORK)

 |███████████████████████████████████████-| 112038/112039 

Dataset loaded!
images shape: (29703, 56, 56, 3)
labels shape: (29703,)
labels (one-hot): (29703, 50)
names shape: (29703, 1)

# of classes: 50
Min # of artworks for all artists: 303
Min # of artworks specified: 300


In [4]:
# LOAD PRE-TRAINED ENCODER
# get base pre-trained model first
# more models are available here: https://www.tensorflow.org/api_docs/python/tf/keras/applications

# define hyperparameters
# define image size 
input_shape = (56,56,3)

# define number of classes
# ****THIS sets the number of dimensions of the encoded vector ("D") in Mark's email***
# we'll probably want adjust this to be smaller or larger (depending on training results)
# for now, classes are just the number of unique artist
classes = len(list(set(labels)))
enet_kwargs = {'include_top':False,
               'weights':'imagenet',
               'input_tensor':None,
               'input_shape':input_shape,
               'pooling':None,
               'classes':classes,
               'classifier_activation':'softmax'}
enet_base = tf.keras.applications.efficientnet.EfficientNetB7(**enet_kwargs)

# set that the encoder DOES NOT train on the images
enet_base.trainable = False

# set pre-trained model as base
enet = tf.keras.models.Sequential()
enet.add(enet_base)

# add two final top layers
enet.add(tf.keras.layers.GlobalMaxPooling2D())
enet.add(tf.keras.layers.Dense(classes, activation="softmax")) # last (top) layer of network

In [5]:
enet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 2, 2, 2560)        64097687  
_________________________________________________________________
global_max_pooling2d (Global (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 50)                128050    
Total params: 64,225,737
Trainable params: 128,050
Non-trainable params: 64,097,687
_________________________________________________________________


In [7]:
images.shape

(29703, 56, 56, 3)

In [8]:
# Create encoded tensors for all 
encoded_arts = []
for image in images:
    print(image.shape)
    vector = enet.predict(image)
    encoded_arts.append(vector)

# Count how many pieces each artist has
artcounts = total_bc[np.unique(labels)]

(56, 56, 3)


ValueError: in user code:

    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1478 predict_function  *
        return step_function(self, iterator)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1468 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1461 run_step  **
        outputs = model.predict_step(data)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:1434 predict_step
        return self(x, training=False)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:271 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) +

    ValueError: Input 0 is incompatible with layer sequential: expected shape=(None, 56, 56, 3), found shape=(None, 56, 3)


In [None]:
# Create aggregate vectors
aggregate_vectors = []
for i in range(len(artcounts)):
    artistnum = np.unique(labels)[i] #Gets the number that represents this artist from labels
    artcount = artcounts[i] #Gets number of art pieces by this artist
    artistaddr = np.where(labels == artistnum) #Collects indices of this artist's art

  # Collect and average vectors
  artist_vec = []
    for addr in artistaddr:
        artist_vec.append(encoded_arts[addr,])
    aggregate_vectors.append(np.mean(artist_vec,axis=0))


In [None]:
# Query Image Removal Function
def query_image_remover(qi_vec, avg_vec, artnum):
    new_vec = (tf.math.subtract(avg_vec,  qi_vec * (1/artnum)) * (artnum/(artnum-1)))
    return new_vec
