# Save recommender


In [76]:
import pandas as pd
import os, sys
import json

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.preprocessing import image
from typing import Union, Optional, Tuple, Dict, Text

from PIL import Image
import matplotlib.pyplot as plt 
from time import time
import numpy as np

sys.path.append('../src')

import settings
import utils
from utils import make_fpath_from_id
from preprocess_images import resize_image
from utils_tf import ImageNormalizer, parse_image_func
from create_recommender import test_an_id, create_image_query_from_fpath


In [2]:
tfrs.__version__

'v0.6.0'

In [3]:
tf.__version__

'2.6.0'

# Load metadata

In [4]:
metadata_path = "../data/processed/ethz/metadata/metadata.csv"
df_meta = pd.read_csv(metadata_path, index_col='id')
df_meta

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1000,22890,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 001368.34,"Dürer, Albrecht (1471 - 1528), Künstler",Enstehung des Druckträgers: Um 1510,"Ungläubiger Thomas, Blatt 34 der Folge ""Kleine...",Druckgraphik,Holzschnitt,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1510,1510,[16],2,[38],1,
1001,294531,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 027604,"Webre [?], François, Künstler",Enstehung des Druckträgers: Um 1700 - 1800,Blatt 7 und 8 einer Folge von Darstellungen kl...,Druckgraphik,"Kupferstich, Radierung, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1700,1800,[7],2,"[50, 73, 64]",1,../data/processed/ethz/images/294/294531.png
1002,59785,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,B 001012,"Ammann, Hans Conrad (1634 - 1707), Künstler",Entstehung: 1645,Reissbüchlein,Buch,"Feder, handkoloriert, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1645,1645,[15],1,"[26, 35, 64]",1,../data/processed/ethz/images/59/59785.png
1003,8041,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 010011.3,"Carracci, Agostino (1557 - 1602); Castello, Be...",Enstehung des Druckträgers: 1590,Plünderungszug bringt die Waffen und die blutg...,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1590,1590,[5],2,[50],1,../data/processed/ethz/images/8/8041.png
1004,101503,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2013.0279,"Ramberg, Johann Heinrich (1763 - 1840), Künstler",Enstehung des Druckträgers: 1825,Ohne Titel [Paar vor Herberge],Handzeichnung,"Aquarell, dubliert, Feder",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1825,1825,[13],3,"[4, 19, 26]",1,../data/processed/ethz/images/101/101503.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25284,9979,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 013066,"Speckter, Otto (1807 - 1871), Radierer; Buddeu...",Enstehung des Druckträgers: Um 1839,"Rückkehr von der Kindertaufe, Blatt 24 aus ""Al...",Druckgraphik,"Radierung, Velin",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1839,1839,[5],2,"[73, 92]",1,../data/processed/ethz/images/9/9979.png
25285,9980,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 006141.1-7,"Saenredam, Jan Pietersz. (1565 um - 1607), Kün...",Enstehung des Druckträgers: 1596,Sieben Planetengötter,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1596,1596,"[16, 1]",2,[50],1,../data/processed/ethz/images/9/9980.png
25286,9986,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 012454,"Duchesne, J. M. (1751 um - um 1800 tätig); Wat...",Enstehung des Druckträgers: Um 1770,Elefant,Druckgraphik,"Kupferstich, Radierung",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1770,1770,,2,"[50, 73]",1,../data/processed/ethz/images/9/9986.png
25287,9989,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000829,"Beauvarlet, Jacques-Firmin (1731 - 1797); Vien...",Enstehung des Druckträgers: 1746 - 1797,Opfer an Venus,Druckgraphik,"Radierung, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1746,1797,,2,"[73, 50]",1,../data/processed/ethz/images/9/9989.png


# Test Retrieval using image and metadata features together

In [5]:
fpath_feat = "../data/processed/ethz/features/features_meta.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [6]:
identifiers = df_feat.index.tolist()
features = df_feat.values

In [7]:
df_feat.head()

Unnamed: 0_level_0,classification_id_1,classification_id_2,classification_id_3,classification_id_4,classification_id_5,material_technique_id_1,material_technique_id_2,material_technique_id_3,material_technique_id_4,material_technique_id_5,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0,1,0,0,0,0,0,0,0,0,...,2.84641,0.476454,0.393871,-0.172278,-0.741765,0.044209,0.489517,-0.741765,0.296849,-0.741765
1002,1,0,0,0,0,0,0,0,0,0,...,2.82545,-0.035121,0.156008,-0.014497,-0.140456,0.799459,0.425874,-0.610783,0.800544,-0.610783
1003,0,1,0,0,0,0,0,0,0,0,...,1.732787,0.8334,1.733942,0.294127,-0.479517,0.290155,0.499136,-0.628738,0.564708,-0.628738
1004,0,0,1,0,0,0,0,0,1,0,...,1.486095,0.436128,1.224127,0.514769,0.458842,-0.188758,0.706383,-0.716077,0.447603,-0.716077
1005,0,1,0,0,0,0,0,0,0,0,...,3.29646,1.094629,1.992935,0.360293,-0.185952,1.26177,0.192709,-0.584842,0.739166,-0.584842


In [8]:
df_feat.query("classification_id_1 == 1")

Unnamed: 0_level_0,classification_id_1,classification_id_2,classification_id_3,classification_id_4,classification_id_5,material_technique_id_1,material_technique_id_2,material_technique_id_3,material_technique_id_4,material_technique_id_5,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1002,1,0,0,0,0,0,0,0,0,0,...,2.825450,-0.035121,0.156008,-0.014497,-0.140456,0.799459,0.425874,-0.610783,0.800544,-0.610783
1288,1,0,0,0,0,0,0,0,0,0,...,3.024016,1.260875,1.005579,0.625809,-0.559870,0.817578,0.242772,-0.600122,0.509730,-0.600122
1289,1,0,0,0,0,0,0,0,0,0,...,3.188366,1.551483,1.182095,0.332583,-0.544295,0.850390,0.278737,-0.582936,0.482528,-0.582936
1290,1,0,0,0,0,0,0,0,0,0,...,2.926200,0.828146,1.424744,0.281409,-0.599576,1.100517,0.972707,-0.664815,0.308363,-0.664815
1291,1,0,0,0,0,0,0,0,0,0,...,2.202350,0.682070,0.517846,-0.090590,-0.576317,0.697704,0.360201,-0.595340,0.259514,-0.595340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23348,1,0,0,0,0,0,0,0,0,0,...,2.464424,0.541477,0.167529,0.385249,0.080427,0.714941,0.342130,-0.711171,1.101728,-0.711171
23349,1,0,0,0,0,0,0,0,0,0,...,3.207061,1.082466,0.988320,0.095147,-0.489977,0.914892,0.104667,-0.584364,0.477090,-0.584364
23353,1,0,0,0,0,0,0,0,0,0,...,3.244109,0.655828,0.243551,-0.296171,-0.694262,0.436959,0.194114,-0.694262,0.668450,-0.694262
23393,1,0,0,0,0,0,0,0,0,0,...,2.953818,1.112624,0.565422,0.140386,-0.591475,0.430165,0.286503,-0.591475,0.870202,-0.591475


In [9]:
def load_filter_lookup():

    #replace this later with a query from the database
    types = ["MaterialTechnique", "Classification","Relationship", "Institution"]
    filter_dict = {}

    for t in types:
        fpath = f"../data/processed/ethz/fixtures/{t}.json"

        with open(fpath, 'r') as f:
            fixture = json.load(f) 

        string_lookup = {d['fields']['name']:d['pk'] for d in fixture}
        filter_dict[t] = string_lookup

    return filter_dict


def make_filter_vec(filter_dict, fltr_type, qry_names):

    qry_vec = np.zeros(len(filter_dict[fltr_type]), dtype=int)
    #subtract 1 as vector is zero indexed but database index starts at 1
    indices = [filter_dict[fltr_type].get(qry_name) -1 for qry_name in qry_names]
#     print(f"{fltr_type} indices: ",indices)

    qry_vec[indices] = 1

    return qry_vec

In [10]:
def str_qry_to_vec(filter_dict,
                   class_qry=[], 
                   mat_tec_qry=[], 
                   rel_qry=[],
                   inst_qry=[]):
    """look up several queries in one function"""

    class_vec = make_filter_vec(filter_dict,
                                fltr_type="Classification",
                                qry_names=class_qry)

    mat_tec_vec = make_filter_vec(filter_dict,
                                  fltr_type="MaterialTechnique",
                                  qry_names=mat_tec_qry)    

    rel_vec = make_filter_vec(filter_dict,
                                fltr_type="Relationship",
                                qry_names=rel_qry)
    
    inst_vec = make_filter_vec(filter_dict,
                                fltr_type="Institution",
                                qry_names=inst_qry)

    qry_vec = np.hstack([class_vec,
                     mat_tec_vec,
                     rel_vec,
                     inst_vec])

    return qry_vec

###  Testing Lookup

In [22]:
filter_dict = load_filter_lookup()

In [48]:
## manually create a vector for testing
classification_qry_names = ["Druckgraphik"]
classification_qry_vec = [1,0]
mat_tec_qry_names = ["aluminiumdruck","kupferstich","zink"]
mat_tec_qry_vec = [1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1]
relationship_qry_names = ["zustandsvariante"]
relationship_qry_vec = [0,0,0,0,1]
institution_qry_names = []
institution_qry_vec = [0,]

qry_names = [classification_qry_names,
             mat_tec_qry_names,
             relationship_qry_names,
             institution_qry_names]

qry_vec_true = np.hstack([classification_qry_vec,
                     mat_tec_qry_vec,
                     relationship_qry_vec,
                     institution_qry_vec])
qry_vec_true


array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0])

In [25]:
qry_vec_test = str_qry_to_vec(filter_dict, 
                              class_qry=classification_qry_names, 
                              mat_tec_qry=mat_tec_qry_names, 
                              rel_qry=relationship_qry_names,
                              inst_qry=institution_qry_names)
qry_vec_test

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

# Make a Brute Force Model

In [46]:
class EmbeddingMetaCross(tf.keras.Model):
    
    def __init__(self, 
                 identifiers=None,
                 candidates_embed=None,
                 candidates_meta=None,
                 query_model=None,
                 k=50):
        super().__init__()
        self.identifiers = identifiers 
        self.candidates_embed = candidates_embed 
        self.candidates_meta = candidates_meta
        self.query_model = query_model
        self.k = tf.cast(k,tf.int32)
    
    def call(self, inputs):
         
        queries_embed = inputs[0]
        
        if self.query_model is not None:
            queries_embed = self.query_model(queries_embed)

        queries_meta = inputs[1]
        
        scores_meta = tf.matmul(queries_meta, self.candidates_meta, transpose_b=True)
        scores_emb = tf.matmul(queries_embed, self.candidates_embed, transpose_b=True)
        
        # add a very small amount to scores_meta to prevent zero scores
#         scores_meta = scores_meta + .0001
        # multiply scores element-wise
        scores = tf.multiply(scores_meta, scores_emb)
        # get top results
        scores, indices = tf.math.top_k(scores, k=self.k)
    
        # look up identifiers
        identifiers = tf.gather(self.identifiers, indices)

        return scores, identifiers

In [27]:
fpath_feat = "../data/processed/ethz/features/features_meta.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [28]:
metadata_cols = [col for col in df_feat.columns if '_id' in col]
df_feat_meta = df_feat.loc[:,metadata_cols]

#embedding cols
embed_cols = [col for col in df_feat.columns if col not in metadata_cols]
df_feat_embed = df_feat.loc[:,embed_cols]


In [29]:
identifiers = df_feat_meta.index.tolist()
identifiers = tf.constant(identifiers, dtype=tf.int32)

candidates_meta = tf.constant(df_feat_meta.values, dtype=tf.float32)
candidates_embed = tf.constant(df_feat_embed.values, dtype=tf.float32)
k=100
k = tf.constant(k, dtype=tf.int32)
index = EmbeddingMetaCross(identifiers=identifiers, 
                      candidates_embed=candidates_embed, 
                      candidates_meta=candidates_meta, 
                      k=k)


### querying with embeddings

In [32]:
# qry_embed = tf.constant(qry_series_embed.values, dtype=tf.float32)
# qry_meta = tf.constant(qry_series_meta.values, dtype=tf.float32)
# k = tf.constant(100, dtype=tf.float32)

In [34]:
i = 3
# get image embeddings
qry_series_embed = df_feat_embed.iloc[i,:].copy()
qry_series_meta = df_feat_meta.iloc[i,:].copy()

# add text based filters
qry_classification = ["Druckgraphik",]
qry_mat_tec = ["dubliert",]

qry_meta_vec = str_qry_to_vec(filter_dict, 
               class_qry=qry_classification, 
               mat_tec_qry=qry_mat_tec, 
               rel_qry=[],
               inst_qry=[])


qry_embed = tf.constant(qry_series_embed.values, dtype=tf.float32)
qry_meta = tf.constant(qry_meta_vec, dtype=tf.float32)

# reshape query as a batch
qry_inputs = (qry_embed, qry_meta)
qry_inputs = [tf.expand_dims(x,0) for x in qry_inputs]

scores,indices = index.call(qry_inputs)
scores,indices

In [35]:
df_meta.loc[indices.numpy()[0],:]

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
19558,31086,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 015776,"Aldegrever, Heinrich (1502 - 1555 / 1561), Kün...",Enstehung des Druckträgers: 1555,"Susanna des Ehebruchs angeklagt, Blatt 2 der F...",Druckgraphik,"Papier vergé, dubliert, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1555,1555,[1],2,"[64, 19, 50]",1,../data/processed/ethz/images/31/31086.png
22113,34268,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 016549,"Guidi, Raffaello (1540 um - 1614), Künstler; G...",Enstehung des Druckträgers: 1613,Merkur,Druckgraphik,"Papier vergé, Kupferstich, dubliert",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1613,1613,,2,"[64, 50, 19]",1,../data/processed/ethz/images/34/34268.png
19470,30973,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 015828,"Aldegrever, Heinrich (1502 - 1555 / 1561), Kün...",Enstehung des Druckträgers: 1555,"Lot und die beiden Engel, Blatt 1 der Folge ""D...",Druckgraphik,"Papier vergé, Kupferstich, dubliert",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1555,1555,[1],2,"[64, 50, 19]",1,../data/processed/ethz/images/30/30973.png
21742,3376,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 013092,"Neeffs, Jacobus (1610 - nach 1660), Ausführung...",Enstehung des Druckträgers: 1626 - 1660,Martyrium des Heiligen Thomas,Druckgraphik,"Kupferstich, dubliert",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1626,1660,,2,"[50, 19]",1,../data/processed/ethz/images/3/3376.png
2558,14586,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000732,"Anonym (Datierung unbekannt), Künstler; Saenre...",Enstehung des Druckträgers: 1600 - 1634,David mit dem Haupt Goliaths,Druckgraphik,"Kupferstich, dubliert, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1600,1634,"[10, 14]",2,"[50, 19, 64]",1,../data/processed/ethz/images/14/14586.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5854,21528,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 011374,"Suyderhoff, Jonas (1613 um - 1686); Ostade, Ad...",Enstehung des Druckträgers: 1633 - 1686,Bauernstube mit tanzendem Paar,Druckgraphik,"Kupferstich, dubliert",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1633,1686,,2,"[50, 19]",1,../data/processed/ethz/images/21/21528.png
22656,4706,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 013081,"Vorsterman (der Ältere), Lucas (1595 - 1675), ...",Enstehung des Druckträgers: 1621,Anbetung der Könige,Druckgraphik,"dubliert, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1621,1621,,2,"[19, 50]",1,../data/processed/ethz/images/4/4706.png
14175,26044,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 011942,"Davent, Léon (1540 - 1556 tätig); Primaticcio,...",Enstehung des Druckträgers: 1540 - 1545,Herkules schläft bei Omphale,Druckgraphik,"Kupferstich, dubliert",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1540,1545,[1],2,"[50, 19]",1,../data/processed/ethz/images/26/26044.png
21045,32845,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 016976,"Muller, Jan Harmensz. (1571 - 1628), Künstler;...",Enstehung des Druckträgers: Um 1591,"Venus, von den Nymphen verehrt",Druckgraphik,"dubliert, Papier vergé, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1591,1591,,2,"[19, 64, 50]",1,../data/processed/ethz/images/32/32845.png


In [36]:
embed_shape = tf.keras.Input(shape=(512,), dtype=tf.float32)
meta_shape = tf.keras.Input(shape=(126,), dtype=tf.float32) 
# k_shape = tf.keras.Input(shape=(1,), dtype=tf.float32)
inputs_shapes = [embed_shape, meta_shape]

In [37]:
index(inputs_shapes)
index.build(input_shape=index.input_shape)

In [38]:
index(qry_inputs)

(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[926.0102 , 922.14685, 921.5996 , 921.55566, 920.19806, 919.73914,
         919.23206, 918.7919 , 918.19116, 918.1645 , 917.4928 , 917.33203,
         917.2022 , 917.0423 , 916.9166 , 916.85126, 916.3197 , 916.22754,
         915.7867 , 914.4802 , 914.4411 , 914.14154, 913.68915, 913.0585 ,
         912.94214, 912.4873 , 911.9475 , 910.6373 , 910.50336, 910.30005,
         910.2628 , 910.078  , 909.8285 , 909.82245, 909.799  , 909.5833 ,
         909.5707 , 909.1105 , 909.02783, 908.65   , 908.05707, 907.95154,
         907.1883 , 906.90173, 906.756  , 906.36816, 905.9768 , 905.6759 ,
         905.3124 , 904.02094, 903.7554 , 903.6174 , 903.4478 , 903.1427 ,
         903.0105 , 902.94037, 902.592  , 902.4507 , 902.2732 , 902.24426,
         901.568  , 901.50165, 900.99976, 900.83234, 900.74225, 900.5747 ,
         900.4672 , 900.2453 , 899.9059 , 899.7362 , 899.70465, 899.68866,
         899.6022 , 899.4207 , 899.4023 , 899.088

### Save Model

In [39]:
tf.saved_model.save(index,"../models/retrieval/3")

INFO:tensorflow:Assets written to: ../models/retrieval/3/assets


### Reload Model
test query with reloaded model

In [40]:
index_load = tf.saved_model.load("../models/retrieval/3")

In [41]:
index_load(qry_inputs)

(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[926.0102 , 922.14685, 921.5996 , 921.55566, 920.19806, 919.73914,
         919.23206, 918.7919 , 918.19116, 918.1645 , 917.4928 , 917.33203,
         917.2022 , 917.0423 , 916.9166 , 916.85126, 916.3197 , 916.22754,
         915.7867 , 914.4802 , 914.4411 , 914.14154, 913.68915, 913.0585 ,
         912.94214, 912.4873 , 911.9475 , 910.6373 , 910.50336, 910.30005,
         910.2628 , 910.078  , 909.8285 , 909.82245, 909.799  , 909.5833 ,
         909.5707 , 909.1105 , 909.02783, 908.65   , 908.05707, 907.95154,
         907.1883 , 906.90173, 906.756  , 906.36816, 905.9768 , 905.6759 ,
         905.3124 , 904.02094, 903.7554 , 903.6174 , 903.4478 , 903.1427 ,
         903.0105 , 902.94037, 902.592  , 902.4507 , 902.2732 , 902.24426,
         901.568  , 901.50165, 900.99976, 900.83234, 900.74225, 900.5747 ,
         900.4672 , 900.2453 , 899.9059 , 899.7362 , 899.70465, 899.68866,
         899.6022 , 899.4207 , 899.4023 , 899.088

# query with image

#### make model

In [92]:
fpath_model = settings.model_fldr_path
ftx_model = tf.keras.models.load_model(fpath_model)

k=100
k = tf.constant(k, dtype=tf.int32)
index = EmbeddingMetaCross(identifiers=identifiers, 
                           candidates_embed=candidates_embed,
                           candidates_meta=candidates_meta,
                           query_model=ftx_model,
                           k=k)



#### make query

In [83]:
def preprocess_img(image_path_or_stream, DEBUG=True):

    img = Image.open(image_path_or_stream)
    img = img.convert("RGB")
    size = 224, 224
    img = resize_image(img, size[0])
    if DEBUG:
        print('np array shape: ', np.array(img).shape)
    img = img.resize(size, Image.ANTIALIAS)
    img = np.array(img)
    if DEBUG:
        print('np array shape: ', img.shape)
    img = img / 255
    img = img.tolist()

    return img


np array shape:  (224, 224, 3)
np array shape:  (224, 224, 3)


1004

In [112]:
i = 3
db_id= df_feat_embed.index[i]
# get image 
fpath = df_meta.loc[db_id,"fpath"]
img = preprocess_img(fpath)
qry_img = tf.constant([img,], dtype=tf.float32)

# add text based filters
qry_classification = ["Druckgraphik",]
qry_mat_tec = ["kupferstich"]
# convert to vector
meta_vec = str_qry_to_vec(filter_dict, 
               class_qry=qry_classification, 
               mat_tec_qry=qry_mat_tec, 
               rel_qry=[],
               inst_qry=[])
# make query tensors
qry_meta = tf.constant(meta_vec, dtype=tf.float32)
qry_img = tf.constant(img, dtype=tf.float32)

# reshape query as a batch
qry_inputs = (qry_img, qry_meta)
qry_inputs = [tf.expand_dims(x,0) for x in qry_inputs]

scores,indices = index.call(qry_inputs)
scores,indices

np array shape:  (224, 224, 3)
np array shape:  (224, 224, 3)


(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[943.42346, 942.2776 , 941.84   , 940.76086, 939.24976, 938.08344,
         937.8844 , 936.4646 , 935.5697 , 935.2591 , 935.0945 , 934.9088 ,
         934.50586, 934.0117 , 933.9233 , 933.7363 , 933.08105, 933.01483,
         932.4539 , 932.42303, 932.3636 , 932.08014, 932.06085, 931.6792 ,
         931.5777 , 931.5491 , 931.17834, 931.06964, 931.0294 , 930.8482 ,
         930.7744 , 930.6373 , 930.59985, 930.46985, 930.36035, 930.355  ,
         930.2127 , 930.0593 , 930.02026, 929.9757 , 929.78235, 929.57745,
         929.4552 , 929.40015, 929.2332 , 929.2309 , 929.2124 , 929.09375,
         929.0638 , 929.05927, 929.00366, 928.8773 , 928.7346 , 928.7013 ,
         928.66327, 928.48596, 928.3865 , 928.3609 , 928.13336, 928.03577,
         928.0036 , 927.9312 , 927.9175 , 927.8589 , 927.8469 , 927.66583,
         927.5918 , 927.52545, 927.4398 , 927.33167, 927.16925, 927.1676 ,
         927.0952 , 927.03955, 927.0237 , 927.014

In [113]:
df_meta.loc[indices.numpy()[0],:]

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1424,1143,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 001367.25,"Raimondi, Marcantonio (1470 / 1482 um - um 152...",Enstehung des Druckträgers: 1510 - 1515,"Christus am Kreuz, Blatt 25 der Folge ""Die Pas...",Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1510,1515,[16],2,[50],1,../data/processed/ethz/images/1/1143.png
24543,8591,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 007895.4,"Aldegrever, Heinrich (1502 - 1555 / 1561)",Enstehung des Druckträgers: 1540,"Adam und Eva verstecken sich vor Gott, Blatt 4...",Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1540,1540,[5],2,[50],1,../data/processed/ethz/images/8/8591.png
8839,236573,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 021880,"Passe (der Ältere), Crispijn de (1564 - 1637),...",Enstehung des Druckträgers: Um 1585 - 1637,Versuchung Christi,Druckgraphik,"Kupferstich, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1585,1637,,2,"[50, 64]",1,../data/processed/ethz/images/236/236573.png
6043,21790,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 007686,"Dürer, Albrecht (1471 - 1528), Künstler",Enstehung des Druckträgers: 1523,Apostel Bartholomäus,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1523,1523,"[1, 10]",2,[50],1,../data/processed/ethz/images/21/21790.png
6683,22301,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 012898,"Wierix, Hieronymus (1553 - 1619), Kopie nach",Enstehung des Druckträgers: Um 1700 - Um 1800 ...,Heiliger Antonius,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1700,1800,,2,[50],1,../data/processed/ethz/images/22/22301.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11829,247488,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 025264,"Schoel, Hendrik van (1622 gestorben), Künstler",Enstehung des Druckträgers: Um 1600 - 1622,Anbetung des Christuskindes durch die Hirten,Druckgraphik,"Kupferstich, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1600,1622,,2,"[50, 64]",1,../data/processed/ethz/images/247/247488.png
9211,236932,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 022227,"Anonym (Datierung unbekannt), Künstler/in",Enstehung des Druckträgers: 1550 - 1600 [zweit...,"Mann mit Zirkel und Dreieck, Architektur",Druckgraphik,"Kupferstich, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1550,1600,,2,"[50, 64]",1,../data/processed/ethz/images/236/236932.png
19558,31086,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 015776,"Aldegrever, Heinrich (1502 - 1555 / 1561), Kün...",Enstehung des Druckträgers: 1555,"Susanna des Ehebruchs angeklagt, Blatt 2 der F...",Druckgraphik,"Papier vergé, dubliert, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1555,1555,[1],2,"[64, 19, 50]",1,../data/processed/ethz/images/31/31086.png
19817,31372,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 016933,"Saenredam, Jan Pietersz. (1565 um - 1607), Kün...",Enstehung des Druckträgers: 1604,"Adam und Eva bei der Arbeit, Blatt 5 der Folge...",Druckgraphik,"Kupferstich, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1604,1604,[1],2,"[50, 64]",1,../data/processed/ethz/images/31/31372.png


# experiment with cosine similarity for post ranking

In [42]:
df_feat.iloc[:,126:].loc[indices.numpy()[0]]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19558,0.114296,-0.597790,0.498638,-0.597790,-0.225226,-0.597790,-0.597790,0.689815,-0.468098,-0.597790,...,1.976307,0.451277,1.376148,0.143139,-0.223372,0.482784,1.100477,-0.597790,0.384275,-0.597790
22113,0.597441,-0.668479,1.856117,-0.668479,0.201761,-0.668479,-0.662563,1.044212,0.038411,-0.668479,...,1.726870,0.363160,1.279065,0.413696,-0.311629,0.359807,0.568241,-0.668479,0.296696,-0.668479
19470,0.195953,-0.618876,1.424347,-0.493038,0.318731,-0.618876,-0.618876,0.350226,0.499453,-0.321029,...,1.783194,0.272848,1.409922,0.280465,0.151470,0.447525,0.624614,-0.618876,0.299529,-0.618876
21742,0.874752,-0.665188,0.295080,-0.665188,0.283479,-0.665188,-0.519762,0.586755,-0.649769,-0.665188,...,1.675358,1.012375,1.636367,0.311924,-0.277919,0.103804,-0.017347,-0.665188,1.265564,-0.665188
2558,-0.199405,-0.635149,0.591217,-0.635149,0.474040,-0.635149,-0.635149,0.516687,0.909131,-0.635149,...,1.999421,0.296637,1.353028,0.670003,-0.059568,0.238252,0.640529,-0.635149,0.280776,-0.635149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5854,0.562475,-0.598707,1.151434,-0.598707,-0.154313,-0.598707,-0.526713,0.071257,-0.598707,-0.598707,...,1.890669,0.223787,1.258144,0.669032,-0.593278,-0.038801,0.554865,-0.598707,0.638352,-0.598707
22656,0.496757,-0.670724,1.181979,-0.315514,0.178492,-0.670724,-0.670724,0.107324,0.513196,-0.324476,...,2.308914,0.417688,1.249788,0.270655,-0.226221,0.995771,0.189609,-0.670724,0.928864,-0.670724
14175,0.411391,-0.607947,0.894980,-0.500327,0.226460,-0.607947,-0.607947,0.222191,-0.561596,-0.607947,...,2.297712,0.950209,1.551591,0.599280,-0.479417,0.531842,0.391015,-0.607947,1.028079,-0.607947
21045,0.115656,-0.655357,0.800468,-0.385348,-0.260987,-0.655357,-0.655357,1.050161,0.757506,-0.451030,...,1.818595,0.583553,1.351623,0.713923,0.746826,0.310634,0.306423,-0.655357,0.348283,-0.655357


In [244]:
cosine_loss = tf.keras.losses.CosineSimilarity(axis=-1,reduction=tf.keras.losses.Reduction.NONE)

In [245]:
cosine_loss(tf.broadcast_to(qry_embed, [100, 512]), tf.constant(df_feat.iloc[:,126:].loc[indices.numpy()[0]].values, dtype=tf.float32) )

<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([-0.90591955, -0.9020876 , -0.90159225, -0.9015013 , -0.90019584,
       -0.89975315, -0.8992799 , -0.89883447, -0.89829284, -0.8982369 ,
       -0.8975458 , -0.8973627 , -0.8972641 , -0.897153  , -0.89696026,
       -0.89692825, -0.89642584, -0.8962996 , -0.89586455, -0.8945787 ,
       -0.8944926 , -0.8943026 , -0.89389795, -0.89317983, -0.89305973,
       -0.8926155 , -0.8921059 , -0.89081836, -0.8907668 , -0.89052737,
       -0.89045805, -0.89029115, -0.89007694, -0.88997185, -0.8900322 ,
       -0.8899186 , -0.8898293 , -0.8893368 , -0.889287  , -0.8888936 ,
       -0.88824284, -0.8882134 , -0.8874745 , -0.8871348 , -0.8870317 ,
       -0.8866393 , -0.8862702 , -0.8860609 , -0.88561964, -0.88437736,
       -0.88418794, -0.88399386, -0.88387716, -0.8835293 , -0.88339233,
       -0.88327765, -0.88289875, -0.88291895, -0.8826823 , -0.8826176 ,
       -0.8820214 , -0.88194597, -0.88144004, -0.88139373, -0.8811556 ,
       -0.880972