Generate encoded vectors for both query and artist aggregrate images

In [None]:
import read_rijksdata
import pandas as pd
import numpy as np
import tensorflow as tf
MIN_NUM_ARTWORK = 300


In [None]:
# LOAD IMAGE AND LABELS HERE
# replace for your path here!
img_folder = '/Users/erebor/Downloads/out_img'

images, labels_onehot, labels, names, total_bc = read_rijksdata.load_data(MIN_NUM_ARTWORK=MIN_NUM_ARTWORK,
                                                 img_folder = img_folder,
                                                 labels_file ='labels.txt',
                                                 names_file = 'names.txt')

classes = len(list(set(labels)))
print('\n# of classes:',classes)

counts = pd.DataFrame(labels).value_counts()
print('Min # of artworks for all artists:',min(counts))
print('Min # of artworks specified:',MIN_NUM_ARTWORK)

 |███████████████████████████████████████-| 112038/112039 

Dataset loaded!
images shape: (29703, 56, 56, 3)
labels shape: (29703,)
labels (one-hot): (29703, 50)
names shape: (29703, 1)

# of classes: 50
Min # of artworks for all artists: 303
Min # of artworks specified: 300


In [None]:
# LOAD PRE-TRAINED ENCODER
# get base pre-trained model first
# more models are available here: https://www.tensorflow.org/api_docs/python/tf/keras/applications

# define hyperparameters
# define image size 
input_shape = (56,56,3)

# define number of classes
# ****THIS sets the number of dimensions of the encoded vector ("D") in Mark's email***
# we'll probably want adjust this to be smaller or larger (depending on training results)
# for now, classes are just the number of unique artist
classes = len(list(set(labels)))
enet_kwargs = {'include_top':False,
               'weights':'imagenet',
               'input_tensor':None,
               'input_shape':input_shape,
               'pooling':None,
               'classes':classes,
               'classifier_activation':'softmax'}
enet_base = tf.keras.applications.efficientnet.EfficientNetB7(**enet_kwargs)

# set that the encoder DOES NOT train on the images
enet_base.trainable = False

# set pre-trained model as base
enet = tf.keras.models.Sequential()
enet.add(enet_base)

# add two final top layers
enet.add(tf.keras.layers.GlobalMaxPooling2D())
enet.add(tf.keras.layers.Dense(classes, activation="softmax")) # last (top) layer of network

In [None]:
enet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
efficientnetb7 (Functional)  (None, 2, 2, 2560)        64097687  
_________________________________________________________________
global_max_pooling2d (Global (None, 2560)              0         
_________________________________________________________________
dense (Dense)                (None, 50)                128050    
Total params: 64,225,737
Trainable params: 128,050
Non-trainable params: 64,097,687
_________________________________________________________________


In [None]:
# to generate vectors, just ask model to predict with the current loaded pre-trained model
# to generate a vector for a single image:
vector = enet.predict(image) # 1-D vector is of length "D"


# load K number of artworks belong to an artist as a 4D array (K,length,width,channels)
images =


# iterate through artist's works to generate an aggregate vector
vector_arr = []
for image in images:
    # vector is 1-D with length "D"
    vector = enet.predict(image)
    vector_arr.append(vector)
    
# not sure if this line is right, but something like this:
# should go back to 1-D vector with length "D", e.g. "classes"
vector_aggregate = np.mean(vector_arr,axis=0)

In [None]:
# Create encoded tensors for all 
encoded_arts = []
for image in images:
  vector = enet.predict(image)
  encoded_arts.append(vector)

# Count how many pieces each artist has
artcounts = total_bc[np.unique(labels)]

In [None]:
# Create aggregate vectors
aggregate_vectors = []
for i in range(len(artcounts)):
  artistnum = np.unique(labels)[i] #Gets the number that represents this artist from labels
  artcount = artcounts[i] #Gets number of art pieces by this artist
  artistaddr = np.where(labels == artistnum) #Collects indices of this artist's art

  # Collect and average vectors
  artist_vec = []
  for addr in artistaddr:
    artist_vec.append(encoded_arts[addr,])
  aggregate_vectors.append(np.mean(artist_vec,axis=0))


In [3]:
# Query Image Removal Function
def query_image_remover(qi_vec, avg_vec, artnum):
    new_vec = (tf.math.subtract(avg_vec,  qi_vec * (1/artnum)) * (artnum/(artnum-1)))
    return new_vec
