# Save recommender


In [1]:
import pandas as pd
import os, sys
import json

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.preprocessing import image
from typing import Union, Optional, Tuple, Dict, Text

from PIL import Image
import matplotlib.pyplot as plt 
from time import time
import numpy as np

sys.path.append('../src')

import settings
import utils
from utils import make_fpath_from_id
from preprocess_images import resize_image
from utils_tf import ImageNormalizer, parse_image_func
from create_recommender import test_an_id, create_image_query_from_fpath


In [2]:
tfrs.__version__

'v0.6.0'

In [3]:
tf.__version__

'2.6.0'

# Load metadata

In [4]:
metadata_path = "../data/processed/ethz/metadata/metadata.csv"
df_meta = pd.read_csv(metadata_path, index_col='id')
df_meta

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1000,22890,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 001368.34,"Dürer, Albrecht (1471 - 1528), Künstler",Enstehung des Druckträgers: Um 1510,"Ungläubiger Thomas, Blatt 34 der Folge ""Kleine...",Druckgraphik,Holzschnitt,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1510,1510,[16],2,[38],1,
1001,294531,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 027604,"Webre [?], François, Künstler",Enstehung des Druckträgers: Um 1700 - 1800,Blatt 7 und 8 einer Folge von Darstellungen kl...,Druckgraphik,"Kupferstich, Radierung, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1700,1800,[7],2,"[50, 73, 64]",1,../data/processed/ethz/images/294/294531.png
1002,59785,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,B 001012,"Ammann, Hans Conrad (1634 - 1707), Künstler",Entstehung: 1645,Reissbüchlein,Buch,"Feder, handkoloriert, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1645,1645,[15],1,"[26, 35, 64]",1,../data/processed/ethz/images/59/59785.png
1003,8041,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 010011.3,"Carracci, Agostino (1557 - 1602); Castello, Be...",Enstehung des Druckträgers: 1590,Plünderungszug bringt die Waffen und die blutg...,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1590,1590,[5],2,[50],1,../data/processed/ethz/images/8/8041.png
1004,101503,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2013.0279,"Ramberg, Johann Heinrich (1763 - 1840), Künstler",Enstehung des Druckträgers: 1825,Ohne Titel [Paar vor Herberge],Handzeichnung,"Aquarell, dubliert, Feder",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1825,1825,[13],3,"[4, 19, 26]",1,../data/processed/ethz/images/101/101503.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25284,9979,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 013066,"Speckter, Otto (1807 - 1871), Radierer; Buddeu...",Enstehung des Druckträgers: Um 1839,"Rückkehr von der Kindertaufe, Blatt 24 aus ""Al...",Druckgraphik,"Radierung, Velin",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1839,1839,[5],2,"[73, 92]",1,../data/processed/ethz/images/9/9979.png
25285,9980,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 006141.1-7,"Saenredam, Jan Pietersz. (1565 um - 1607), Kün...",Enstehung des Druckträgers: 1596,Sieben Planetengötter,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1596,1596,"[16, 1]",2,[50],1,../data/processed/ethz/images/9/9980.png
25286,9986,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 012454,"Duchesne, J. M. (1751 um - um 1800 tätig); Wat...",Enstehung des Druckträgers: Um 1770,Elefant,Druckgraphik,"Kupferstich, Radierung",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1770,1770,,2,"[50, 73]",1,../data/processed/ethz/images/9/9986.png
25287,9989,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000829,"Beauvarlet, Jacques-Firmin (1731 - 1797); Vien...",Enstehung des Druckträgers: 1746 - 1797,Opfer an Venus,Druckgraphik,"Radierung, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1746,1797,,2,"[73, 50]",1,../data/processed/ethz/images/9/9989.png


# Test Retrieval using image and metadata features together

In [5]:
fpath_feat = "../data/processed/ethz/features/features_meta.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [6]:
identifiers = df_feat.index.tolist()
features = df_feat.values

In [7]:
df_feat.head()

Unnamed: 0_level_0,classification_id_1,classification_id_2,classification_id_3,classification_id_4,classification_id_5,material_technique_id_1,material_technique_id_2,material_technique_id_3,material_technique_id_4,material_technique_id_5,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1001,0,1,0,0,0,0,0,0,0,0,...,2.84641,0.476454,0.393871,-0.172278,-0.741765,0.044209,0.489517,-0.741765,0.296849,-0.741765
1002,1,0,0,0,0,0,0,0,0,0,...,2.82545,-0.035121,0.156008,-0.014497,-0.140456,0.799459,0.425874,-0.610783,0.800544,-0.610783
1003,0,1,0,0,0,0,0,0,0,0,...,1.732787,0.8334,1.733942,0.294127,-0.479517,0.290155,0.499136,-0.628738,0.564708,-0.628738
1004,0,0,1,0,0,0,0,0,1,0,...,1.486095,0.436128,1.224127,0.514769,0.458842,-0.188758,0.706383,-0.716077,0.447603,-0.716077
1005,0,1,0,0,0,0,0,0,0,0,...,3.29646,1.094629,1.992935,0.360293,-0.185952,1.26177,0.192709,-0.584842,0.739166,-0.584842


In [8]:
df_feat.query("classification_id_1 == 1")

Unnamed: 0_level_0,classification_id_1,classification_id_2,classification_id_3,classification_id_4,classification_id_5,material_technique_id_1,material_technique_id_2,material_technique_id_3,material_technique_id_4,material_technique_id_5,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1002,1,0,0,0,0,0,0,0,0,0,...,2.825450,-0.035121,0.156008,-0.014497,-0.140456,0.799459,0.425874,-0.610783,0.800544,-0.610783
1288,1,0,0,0,0,0,0,0,0,0,...,3.024016,1.260875,1.005579,0.625809,-0.559870,0.817578,0.242772,-0.600122,0.509730,-0.600122
1289,1,0,0,0,0,0,0,0,0,0,...,3.188366,1.551483,1.182095,0.332583,-0.544295,0.850390,0.278737,-0.582936,0.482528,-0.582936
1290,1,0,0,0,0,0,0,0,0,0,...,2.926200,0.828146,1.424744,0.281409,-0.599576,1.100517,0.972707,-0.664815,0.308363,-0.664815
1291,1,0,0,0,0,0,0,0,0,0,...,2.202350,0.682070,0.517846,-0.090590,-0.576317,0.697704,0.360201,-0.595340,0.259514,-0.595340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23348,1,0,0,0,0,0,0,0,0,0,...,2.464424,0.541477,0.167529,0.385249,0.080427,0.714941,0.342130,-0.711171,1.101728,-0.711171
23349,1,0,0,0,0,0,0,0,0,0,...,3.207061,1.082466,0.988320,0.095147,-0.489977,0.914892,0.104667,-0.584364,0.477090,-0.584364
23353,1,0,0,0,0,0,0,0,0,0,...,3.244109,0.655828,0.243551,-0.296171,-0.694262,0.436959,0.194114,-0.694262,0.668450,-0.694262
23393,1,0,0,0,0,0,0,0,0,0,...,2.953818,1.112624,0.565422,0.140386,-0.591475,0.430165,0.286503,-0.591475,0.870202,-0.591475


In [236]:
def load_filter_lookup():

    #replace this later with a query from the database
    types = ["MaterialTechnique", "Classification","Relationship", "Institution"]
    filter_dict = {}

    for t in types:
        fpath = f"../data/processed/ethz/fixtures/{t}.json"

        with open(fpath, 'r') as f:
            fixture = json.load(f) 

        string_lookup = {d['fields']['name']:d['pk'] for d in fixture}
        filter_dict[t] = string_lookup

    return filter_dict


def make_filter_vec(filter_dict, fltr_type, qry_names):
    
    qry_vec_len = len(filter_dict[fltr_type])
    
    if qry_names:
        qry_vec = np.zeros(qry_vec_len, dtype=int)
        # subtract 1 as vector is zero indexed but database index starts at 1
        indices = [filter_dict[fltr_type].get(qry_name) -1 for qry_name in qry_names]
        qry_vec[indices] = 1

    else:
        qry_vec = np.zeros(qry_vec_len, dtype=int)
    
    return qry_vec

In [237]:
def str_qry_to_vec(filter_dict,
                   class_qry=[], 
                   mat_tec_qry=[], 
                   rel_qry=[],
                   inst_qry=[]):
    """
    combine several one hot encoded queries in one function
    if no query parameters are passed (i.e. all empty lists)
    returns a vector of all ones
    """

    class_vec = make_filter_vec(filter_dict,
                                fltr_type="Classification",
                                qry_names=class_qry)

    mat_tec_vec = make_filter_vec(filter_dict,
                                  fltr_type="MaterialTechnique",
                                  qry_names=mat_tec_qry)    

    rel_vec = make_filter_vec(filter_dict,
                                fltr_type="Relationship",
                                qry_names=rel_qry)
    
    inst_vec = make_filter_vec(filter_dict,
                                fltr_type="Institution",
                                qry_names=inst_qry)

    qry_vecs = (class_vec,
                mat_tec_vec,
                rel_vec,
                inst_vec)
    
    return qry_vecs

###  Testing Lookup

In [7]:
filter_dict = load_filter_lookup()

In [8]:
## manually create a vector for testing
classification_qry_names = ["Druckgraphik"]
classification_qry_vec = [1,0]
mat_tec_qry_names = ["aluminiumdruck","kupferstich","zink"]
mat_tec_qry_vec = [1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1]
relationship_qry_names = ["zustandsvariante"]
relationship_qry_vec = [0,0,0,0,1]
institution_qry_names = []
institution_qry_vec = [0,]

qry_names = [classification_qry_names,
             mat_tec_qry_names,
             relationship_qry_names,
             institution_qry_names]

qry_vec_true = np.hstack([classification_qry_vec,
                     mat_tec_qry_vec,
                     relationship_qry_vec,
                     institution_qry_vec])
qry_vec_true


array([1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0])

In [9]:
qry_vecs = str_qry_to_vec(filter_dict, 
                              class_qry=classification_qry_names, 
                              mat_tec_qry=mat_tec_qry_names, 
                              rel_qry=relationship_qry_names,
                              inst_qry=institution_qry_names)
qry_vecs

(array([0, 1, 0, 0, 0]),
 array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]),
 array([1]))

In [11]:

qry_vec_true = np.hstack(qry_vecs)
qry_vec_true


array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [221]:
qry_vecs = str_qry_to_vec(filter_dict, 
                              class_qry=["Buch"], 
                              mat_tec_qry=["kaltnadel","velin","kupferstich"], 
                              rel_qry=[],
                              inst_qry=[])
qry_vecs

(array([1, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([1]))

In [222]:
qry_vec_test = str_qry_to_vec(filter_dict, 
                              class_qry=[], 
                              mat_tec_qry=[], 
                              rel_qry=[],
                              inst_qry=[])
qry_vec_test

(array([1, 1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([1]))

In [223]:
qry_inputs[1]

<tf.Tensor: shape=(1, 126), dtype=float32, numpy=
array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]],
      dtype=float32)>

In [123]:
candidates_meta

<tf.Tensor: shape=(20792, 126), dtype=float32, numpy=
array([[0., 1., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       ...,
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 1., 0., ..., 0., 1., 1.]], dtype=float32)>

In [124]:
tf.multiply(qry_inputs[1], candidates_meta)

<tf.Tensor: shape=(1, 20792, 126), dtype=float32, numpy=
array([[[0., 1., 0., ..., 0., 0., 1.],
        [1., 0., 0., ..., 0., 0., 1.],
        [0., 1., 0., ..., 0., 0., 1.],
        ...,
        [0., 1., 0., ..., 0., 0., 1.],
        [0., 1., 0., ..., 0., 0., 1.],
        [0., 1., 0., ..., 0., 1., 1.]]], dtype=float32)>

In [129]:

scores_meta = tf.matmul(qry_inputs[1], candidates_meta, transpose_b=True)
scores_meta

<tf.Tensor: shape=(1, 1, 20792), dtype=float32, numpy=array([[[6., 6., 4., ..., 4., 4., 4.]]], dtype=float32)>

In [131]:
tf.math.reduce_min(scores_meta)

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

# Make a Brute Force Model

In [277]:
class EmbeddingMetaCross(tf.keras.Model):
    
    def __init__(self, 
                 identifiers=None,
                 candidates_embed=None,
                 candidates_meta=None,
                 candidates_years=None,
                 query_model=None,
                 k=100):
        
        """
        candidates_years: 2d array of min year and max year for the candidate
        """ 
        super().__init__()
        self.identifiers = identifiers 
        self.candidates_embed = candidates_embed 
        self.candidates_meta = candidates_meta
        self.candidates_years = candidates_years
        self.query_model = query_model
        self.k = tf.cast(k,tf.int32)
    
    def call(self, input_image, input_meta, input_years):

#         queries_embed = inputs[0]
        queries_embed = input_image
        if self.query_model is not None:
            queries_embed = self.query_model(queries_embed)

#         queries_meta = inputs[1]
        queries_meta = input_meta
#         queries_years = inputs[2]        
                # filter meta scores by year
        # include candidate if candidate's max year is after query min year
#         print("queries_years[:,0]", input_years[:,0])
        res_yr_after = tf.math.greater(candidates_years[:,1], input_years[:,0])
        # include candidate if candidate's min year is before query max year
        res_yr_before = tf.math.less(candidates_years[:,0], input_years[:,1])
        scores_meta = tf.cast(tf.math.logical_and(res_yr_after, res_yr_before), tf.float32)
#         print("scores_years: ", scores_meta)

        
        
        if queries_meta is not None:
            scores_meta_only = tf.matmul(queries_meta, self.candidates_meta, transpose_b=True)
#             print("scores_meta: ", tf.math.reduce_max(scores_meta_only))
            scores_meta = tf.multiply(scores_meta_only, scores_meta)
#             print("scores_meta2:",tf.math.reduce_max(scores_meta))

        scores_emb = tf.matmul(queries_embed, self.candidates_embed, transpose_b=True)
        
        # add a very small amount to scores_meta to prevent zero scores
#         scores_meta = scores_meta + 1e-6
        # multiply scores element-wise
        scores = tf.multiply(scores_meta, scores_emb)
        # get top results
        scores, indices = tf.math.top_k(scores, k=self.k)
    
        # look up identifiers
        identifiers = tf.gather(self.identifiers, indices)

        return scores, identifiers

In [225]:
fpath_feat = "../data/processed/ethz/features/features_meta_2.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [226]:
# one hot metadata cols
metadata_cols = [col for col in df_feat.columns if '_id' in col]
df_feat_meta = df_feat.loc[:,metadata_cols]
# years cols
df_years = df_feat.loc[:,["year_min","year_max"]]
#embedding cols
embed_len=512
df_feat_embed = df_feat.iloc[:,-embed_len:]


In [243]:
identifiers = df_feat_meta.index.tolist()
identifiers = tf.constant(identifiers, dtype=tf.int32)

candidates_meta = tf.constant(df_feat_meta.values, dtype=tf.float32)
candidates_embed = tf.constant(df_feat_embed.values, dtype=tf.float32)
candidates_years = tf.constant(df_years.values,dtype=tf.float32)


k=100
k = tf.constant(k, dtype=tf.int32)
index = EmbeddingMetaCross(identifiers=identifiers, 
                           candidates_embed=candidates_embed,
                           candidates_meta=candidates_meta,
                           candidates_years=candidates_years,
                           k=k)


### querying with embeddings

In [228]:
# qry_embed = tf.constant(qry_series_embed.values, dtype=tf.float32)
# qry_meta = tf.constant(qry_series_meta.values, dtype=tf.float32)
# k = tf.constant(100, dtype=tf.float32)

In [254]:
i = 10
# get image query
qry_series_embed = df_feat_embed.iloc[i,:].copy()
qry_series_meta = df_feat_meta.iloc[i,:].copy()
qry_embed = tf.constant(qry_series_embed.values, dtype=tf.float32)

# add text based query
qry_classification = []
qry_mat_tec = ["gouache",]
qry_meta_vec = str_qry_to_vec(filter_dict, 
               class_qry=qry_classification, 
               mat_tec_qry=qry_mat_tec, 
               rel_qry=[],
               inst_qry=[])
qry_meta_vec = np.hstack(qry_meta_vec)
print(qry_meta_vec)
# qry_meta_vec = np.zeros(126)
# qry_meta_vec[0] = 1
qry_meta = tf.constant(qry_meta_vec, dtype=tf.float32)
qry_meta = tf.expand_dims(qry_meta, 0)
# qry_meta = tf.constant([], dtype=tf.float32)
# add year filter
qry_years = tf.constant([1800,2000], dtype=tf.float32)


# reshape query as a batch
qry_inputs = (qry_embed, qry_meta, qry_years)
qry_inputs = [tf.expand_dims(x,0) for x in qry_inputs]

scores,indices = index.call(input_image=qry_inputs[0],
                            input_meta=qry_meta,
                            input_years=qry_inputs[2])
scores,indices

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
queries_years[:,0] tf.Tensor([1800.], shape=(1,), dtype=float32)
scores_years:  tf.Tensor([0. 0. 0. ... 0. 0. 0.], shape=(20792,), dtype=float32)
scores_meta:  tf.Tensor(1.0, shape=(), dtype=float32)
scores_meta2: tf.Tensor(1.0, shape=(), dtype=float32)


(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[472.73895, 467.11795, 466.37372, 460.7483 , 459.38586, 457.37115,
         455.8239 , 455.46558, 449.22522, 426.51752, 398.80157,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.     ,   0.     ,   0.     ,
           0.     ,   0.     ,   0.     ,   0.   

In [255]:
df_meta.loc[indices.numpy()[0],:]

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
4370,201486,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,Z 000511,"Bühlmann, Johann Rudolf (1812 - 1890), Künstler",Entstehung: 24.7.1835,Glacier de Zmutt vers le Matterhorn et le Dent...,Handzeichnung,"Aquarell, Bleistift, Gouache, Velin",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1835,1835,[16],3,"[4, 9, 30, 92]",1,../data/processed/ethz/images/201/201486.png
4371,201487,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,Z 000512,"Bühlmann, Johann Rudolf (1812 - 1890), Künstler",Entstehung: 31.7.1835,Le Glacier d. Torrent au Valle d'Anniviers [re...,Handzeichnung,"Aquarell, Bleistift, Gouache, Velin",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1835,1835,[16],3,"[4, 9, 30, 92]",1,../data/processed/ethz/images/201/201487.png
4372,201488,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,Z 000513,"Bühlmann, Johann Rudolf (1812 - 1890), Künstler",Entstehung: 30.7.1832,Glacier de Zinal au Vallé d‘Anniviers,Handzeichnung,"Aquarell, Gouache, Bleistift, Velin",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1832,1832,[16],3,"[4, 30, 9, 92]",1,../data/processed/ethz/images/201/201488.png
19224,30672,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,Z 000374,"Bühlmann, Johann Rudolf (1812 - 1890), Künstler",Entstehung: 1.8.1835,Glacier de la Arolla dans la Valle d'Erin [d’H...,Handzeichnung,"Aquarell, Bleistift, Kreide, Gouache",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1835,1835,[5],3,"[4, 9, 46, 30]",1,../data/processed/ethz/images/30/30672.png
7930,22805,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,Z 000367,"Bühlmann, Johann Rudolf (1812 - 1890), Künstler",Entstehung: 23.7.1835,Boden Gletscher bei Zermatt,Handzeichnung,"Gouache, Kreide",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1835,1835,[5],3,"[30, 46]",1,../data/processed/ethz/images/22/22805.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1091,10302,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000879,"Andreani, Andrea (1558 / 1559 um - 1629), Küns...",Enstehung des Druckträgers: 1591,Maria mit Kind und einem Bischof,Druckgraphik,"Chiaroscuroschnitt, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1591,1591,,2,"[11, 64]",1,../data/processed/ethz/images/10/10302.png
1092,1031,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 007287,"Houbraken, Jacobus (1698 - 1780); Drouais, Fra...",Enstehung des Druckträgers: 1774,Porträt von Georges Louis Leclerc de Buffon,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1774,1774,,2,[50],1,../data/processed/ethz/images/1/1031.png
1093,10338,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000666,"Leyden, Lucas Hugensz. van (1488 / 1499 bzw. 1...",Enstehung des Druckträgers: Nach 1524,Musizierendes Paar,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1524,1524,[11],2,[50],1,../data/processed/ethz/images/10/10338.png
1094,103483,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 017650,"Collignon, François (1610 - 1687), Künstler; T...",Enstehung des Druckträgers: 1656,Jüngling wird von Fortuna der Zeit und der Mis...,Druckgraphik,"Radierung, Papier vergé",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1656,1656,,2,"[73, 64]",1,../data/processed/ethz/images/103/103483.png


In [182]:
(df_meta["classification"]=="Buch").sum()

77

In [345]:
embed_shape = tf.keras.Input(shape=(512,), dtype=tf.float32)
meta_shape = tf.keras.Input(shape=(126,), dtype=tf.float32) 
years_shape = tf.keras.Input(shape=(2,), dtype=tf.float32)
# k_shape = tf.keras.Input(shape=(1,), dtype=tf.float32)
inputs_shapes = [embed_shape, meta_shape, years_shape]

In [355]:
inputs_shapes

[<KerasTensor: shape=(None, 512) dtype=float32 (created by layer 'input_13')>,
 <KerasTensor: shape=(None, 126) dtype=float32 (created by layer 'input_14')>,
 <KerasTensor: shape=(None, 2) dtype=float32 (created by layer 'input_15')>]

In [346]:
index(inputs_shapes)
index.build(input_shape=index.input_shape)

In [347]:
index(qry_inputs)

(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[464.33167, 462.5372 , 461.48358, 461.10098, 461.07373, 460.47134,
         459.9972 , 459.80835, 459.44696, 459.12592, 457.96283, 457.66257,
         457.5121 , 457.18597, 456.8543 , 456.788  , 456.58514, 456.53745,
         456.2824 , 455.9848 , 455.9546 , 455.66266, 455.6113 , 455.33945,
         454.99796, 454.40054, 454.39377, 454.39267, 454.3191 , 453.99698,
         453.97803, 453.8394 , 453.31458, 453.2952 , 453.13214, 452.84866,
         452.48065, 452.448  , 452.39536, 452.19232, 452.1023 , 451.99707,
         451.86505, 451.7464 , 451.63605, 451.4455 , 451.43802, 451.41876,
         451.2775 , 450.91406, 450.89465, 450.13977, 450.0121 , 449.9319 ,
         449.9114 , 449.67883, 449.60077, 449.2818 , 449.1251 , 449.06134,
         448.96982, 448.95905, 448.86493, 448.82034, 448.80753, 448.73776,
         448.55853, 448.3976 , 448.20688, 448.19232, 448.1154 , 447.79144,
         447.71774, 447.6941 , 447.66754, 447.540

### Save Model

In [348]:
tf.saved_model.save(index,"../models/retrieval/5")

INFO:tensorflow:Assets written to: ../models/retrieval/5/assets


### Reload Model
test query with reloaded model

In [349]:
index_load = tf.saved_model.load("../models/retrieval/5")

In [350]:
index_load(qry_inputs)

(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[464.33167, 462.5372 , 461.48358, 461.10098, 461.07373, 460.47134,
         459.9972 , 459.80835, 459.44696, 459.12592, 457.96283, 457.66257,
         457.5121 , 457.18597, 456.8543 , 456.788  , 456.58514, 456.53745,
         456.2824 , 455.9848 , 455.9546 , 455.66266, 455.6113 , 455.33945,
         454.99796, 454.40054, 454.39377, 454.39267, 454.3191 , 453.99698,
         453.97803, 453.8394 , 453.31458, 453.2952 , 453.13214, 452.84866,
         452.48065, 452.448  , 452.39536, 452.19232, 452.1023 , 451.99707,
         451.86505, 451.7464 , 451.63605, 451.4455 , 451.43802, 451.41876,
         451.2775 , 450.91406, 450.89465, 450.13977, 450.0121 , 449.9319 ,
         449.9114 , 449.67883, 449.60077, 449.2818 , 449.1251 , 449.06134,
         448.96982, 448.95905, 448.86493, 448.82034, 448.80753, 448.73776,
         448.55853, 448.3976 , 448.20688, 448.19232, 448.1154 , 447.79144,
         447.71774, 447.6941 , 447.66754, 447.540

In [354]:
index_load.inputs

AttributeError: '_UserObject' object has no attribute 'inputs'

# query with image

#### make model

In [278]:
fpath_feat = "../data/processed/ethz/features/features_meta_2.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [279]:
# one hot metadata cols
metadata_cols = [col for col in df_feat.columns if '_id' in col]
df_feat_meta = df_feat.loc[:,metadata_cols]
# years cols
df_years = df_feat.loc[:,["year_min","year_max"]]
#embedding cols
embed_len=512
df_feat_embed = df_feat.iloc[:,-embed_len:]


In [280]:
fpath_model = settings.model_fldr_path
fpath_model = "../models/feature_extraction/2"
ftx_model = tf.keras.models.load_model(fpath_model)

identifiers = df_feat_meta.index.tolist()
identifiers = tf.constant(identifiers, dtype=tf.int32)

candidates_meta = tf.constant(df_feat_meta.values, dtype=tf.float32)
candidates_embed = tf.constant(df_feat_embed.values, dtype=tf.float32)
candidates_years = tf.constant(df_years.values,dtype=tf.float32)


k=100
k = tf.constant(k, dtype=tf.int32)
index = EmbeddingMetaCross(identifiers=identifiers, 
                           candidates_embed=candidates_embed,
                           candidates_meta=candidates_meta,
                           candidates_years=candidates_years,
                           query_model=ftx_model,
                           k=k)




In [281]:
candidates_meta.shape

TensorShape([20792, 126])

#### make query

In [282]:
def preprocess_img(image_path_or_stream, DEBUG=True):

    img = Image.open(image_path_or_stream)
    img = img.convert("RGB")
    size = 224, 224
    img = resize_image(img, size[0])
    if DEBUG:
        print('np array shape: ', np.array(img).shape)
    img = img.resize(size, Image.ANTIALIAS)
    img = np.array(img)
    if DEBUG:
        print('np array shape: ', img.shape)
    img = img / 255
#     img = img.tolist()

    return img


In [290]:
i = 100

db_id= df_feat_embed.index[i]
print(db_id)
# load an image file 
fpath = df_meta.loc[db_id,"fpath"]
print(fpath)
img = preprocess_img(fpath, DEBUG=False)
qry_img = tf.constant(img, dtype=tf.float32)
qry_img = tf.expand_dims(qry_img, 0)
# add text based filters
# qry_classification = ["Buch",]
# qry_mat_tec = []
# convert to vector
# meta_vec = str_qry_to_vec(filter_dict, 
#                class_qry=qry_classification, 
#                mat_tec_qry=qry_mat_tec, 
#                rel_qry=[],
#                inst_qry=[])
# # make query tensors
meta_vec = np.zeros(126)
meta_vec[3 :] = 1
qry_meta = tf.constant(meta_vec, dtype=tf.float32)
qry_meta = tf.expand_dims(qry_meta, 0)

qry_years = tf.constant([0,3000], dtype=tf.float32)
qry_years = tf.expand_dims(qry_years, 0)
# reshape query as a batch
qry_inputs = (qry_img, qry_meta, qry_years)
qry_inputs = [tf.expand_dims(x,0) for x in qry_inputs]

scores,indices = index(input_image=qry_img,
                       input_meta=qry_meta,
                       input_years=qry_years)
scores,indices

1107
../data/processed/ethz/images/10/10353.png


(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[3566.283 , 3344.7827, 3318.8867, 3210.5483, 3209.6885, 3155.8608,
         3131.5632, 3129.4333, 3101.4844, 3097.4966, 3087.271 , 3085.0068,
         3074.8037, 3068.249 , 3068.044 , 3040.8284, 3036.9175, 3031.6704,
         3011.4995, 2992.2075, 2989.9592, 2985.215 , 2984.9897, 2974.432 ,
         2972.2126, 2960.149 , 2958.644 , 2958.336 , 2958.336 , 2955.387 ,
         2955.2979, 2952.5474, 2949.449 , 2948.8115, 2938.2278, 2935.111 ,
         2929.4028, 2928.6184, 2928.1033, 2924.818 , 2919.9956, 2916.521 ,
         2916.2947, 2915.7996, 2910.764 , 2910.764 , 2910.577 , 2908.9487,
         2907.9438, 2903.1587, 2902.5332, 2901.2605, 2900.1624, 2899.3096,
         2899.1335, 2898.3066, 2898.021 , 2897.4211, 2893.9382, 2877.7788,
         2875.7446, 2866.9556, 2866.7673, 2850.801 , 2836.2837, 2832.743 ,
         2831.88  , 2821.9565, 2821.059 , 2811.3625, 2806.4265, 2806.3599,
         2805.9136, 2804.463 , 2803.0137, 2802.75

In [284]:
df_meta.loc[indices.numpy()[0],:]

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1107,10353,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000511,"Leyden, Lucas Hugensz. van (1488 / 1499 bzw. 1...",Enstehung des Druckträgers: 1530,Fides,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1530,1530,,2,[50],1,../data/processed/ethz/images/10/10353.png
18340,29908,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000396.12,"Muller, Jan Harmensz. (1571 - 1628), Künstler;...",Enstehung des Druckträgers: Um 1615 - 1620,"Grablegung, Blatt 12 der Folge ""Die Passion Ch...",Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1615,1620,"[10, 16, 1]",2,[50],1,../data/processed/ethz/images/29/29908.png
14584,26453,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000688,"Muller, Jan Harmensz. (1571 - 1628), Künstler;...",Enstehung des Druckträgers: Um 1615 - 1620,Grablegung,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1615,1620,"[10, 1]",2,[50],1,../data/processed/ethz/images/26/26453.png
14513,26391,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 008362,"Treu, Martin (1540 tätig um)",Enstehung des Druckträgers: Um 1540,"Tanzendes Bauernpaar, Blatt 5 der Folge ""Tanze...",Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1540,1540,,2,[50],1,../data/processed/ethz/images/26/26391.png
2167,13583,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000468,"Leyden, Lucas Hugensz. van (1488 / 1499 bzw. 1...",Enstehung des Druckträgers: Um 1506,Sündenfall,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1506,1506,,2,[50],1,../data/processed/ethz/images/13/13583.png
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8620,23504,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 008211,"Monogrammist SK (16. Jahrhundert, 2. Hälfte), ...",Enstehung des Druckträgers: 1550 - 1600 [zweit...,Beschneidung Christi,Druckgraphik,Radierung,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1550,1600,,2,[73],1,../data/processed/ethz/images/23/23504.png
4416,20217,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 007667,"Dürer, Albrecht (1471 - 1528)",Enstehung des Druckträgers: 1512,"Geisselung, Blatt 6 der Folge ""Kupferstich-Pas...",Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1512,1512,[5],2,[50],1,../data/processed/ethz/images/20/20217.png
1926,12870,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 000357,"Leyden, Lucas Hugensz. van (1488 / 1499 bzw. 1...",Enstehung des Druckträgers: Um 1508,David spielt vor Saulus die Harfe,Druckgraphik,Kupferstich,Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1508,1508,[19],2,[50],1,../data/processed/ethz/images/12/12870.png
19465,30966,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,D 015888,"Beham, Hans Sebald (1500 - 1550), Künstler",Enstehung des Druckträgers: 1545 - 1546,"Jakobus der Ältere, Blatt 9 der Folge ""Die zwö...",Druckgraphik,"Papier vergé, Kupferstich",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1545,1546,[19],2,"[64, 50]",1,../data/processed/ethz/images/30/30966.png


In [266]:
img_shape = tf.keras.Input(name="input_image", shape=(224,224,3), dtype=tf.float32)
meta_shape = tf.keras.Input(name="input_meta", shape=(126,), dtype=tf.float32)
years_shape = tf.keras.Input(name="input_years", shape=(2,), dtype=tf.float32)
# k_shape = tf.keras.Input(shape=(1,), dtype=tf.float32)
inputs_shapes = [img_shape, meta_shape, years_shape]
inputs_shapes = {"input_image":img_shape,"input_meta":meta_shape,"input_years":years_shape}

In [270]:
index(**inputs_shapes)
# index.build(input_shape=index.input_shape,)

queries_years[:,0] Tensor("embedding_meta_cross_18/strided_slice:0", shape=(None,), dtype=float32)
scores_years:  Tensor("embedding_meta_cross_18/Cast:0", shape=(20792,), dtype=float32)
scores_meta:  Tensor("embedding_meta_cross_18/Max:0", shape=(), dtype=float32)
scores_meta2: Tensor("embedding_meta_cross_18/Max_1:0", shape=(), dtype=float32)


(<KerasTensor: shape=(None, 100) dtype=float32 (created by layer 'embedding_meta_cross_18')>,
 <KerasTensor: shape=(None, 100) dtype=int32 (created by layer 'embedding_meta_cross_18')>)

In [272]:
scores,indices = index(input_image=qry_img,
                       input_meta=qry_meta,
                       input_years=qry_years)
scores,indices

queries_years[:,0] tf.Tensor([0.], shape=(1,), dtype=float32)
scores_years:  tf.Tensor([1. 1. 1. ... 1. 1. 1.], shape=(20792,), dtype=float32)
scores_meta:  tf.Tensor(1.0, shape=(), dtype=float32)
scores_meta2: tf.Tensor(1.0, shape=(), dtype=float32)


(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[511.09738, 474.79355, 474.53394, 473.74585, 473.57144, 473.44818,
         473.30597, 473.2421 , 472.54214, 472.40884, 472.2439 , 471.97998,
         471.91486, 471.90445, 471.82657, 471.63788, 471.6312 , 471.26486,
         471.2446 , 471.2411 , 471.23495, 470.93915, 470.93314, 470.92337,
         470.8526 , 470.53268, 470.5135 , 470.4675 , 470.41757, 470.35168,
         470.32608, 470.22314, 470.1765 , 470.1632 , 470.01736, 469.93753,
         469.90085, 469.89807, 469.89154, 469.76895, 469.71808, 469.65234,
         469.62274, 469.49887, 469.48315, 469.36987, 469.30957, 469.2813 ,
         469.2753 , 469.22937, 469.18927, 469.18372, 469.06012, 468.9788 ,
         468.974  , 468.95917, 468.90182, 468.64917, 468.63284, 468.5835 ,
         468.58252, 468.5764 , 468.5615 , 468.45013, 468.4417 , 468.31274,
         468.2296 , 468.21075, 468.20465, 468.198  , 468.1685 , 468.16364,
         468.14856, 468.1018 , 468.0821 , 468.082

### Save Model

In [273]:
tf.saved_model.save(index,"../models/retrieval_exclusion/3")

queries_years[:,0] Tensor("embedding_meta_cross_18/strided_slice:0", shape=(None,), dtype=float32)
scores_years:  Tensor("embedding_meta_cross_18/Cast:0", shape=(20792,), dtype=float32)
scores_meta:  Tensor("embedding_meta_cross_18/Max:0", shape=(), dtype=float32)
scores_meta2: Tensor("embedding_meta_cross_18/Max_1:0", shape=(), dtype=float32)
queries_years[:,0] Tensor("strided_slice:0", shape=(None,), dtype=float32)
scores_years:  Tensor("Cast:0", shape=(20792,), dtype=float32)
scores_meta:  Tensor("Max:0", shape=(), dtype=float32)
scores_meta2: Tensor("Max_1:0", shape=(), dtype=float32)
queries_years[:,0] Tensor("strided_slice:0", shape=(None,), dtype=float32)
scores_years:  Tensor("Cast:0", shape=(20792,), dtype=float32)
scores_meta:  Tensor("Max:0", shape=(), dtype=float32)
scores_meta2: Tensor("Max_1:0", shape=(), dtype=float32)
queries_years[:,0] Tensor("strided_slice:0", shape=(None,), dtype=float32)
scores_years:  Tensor("Cast:0", shape=(20792,), dtype=float32)
scores_meta:  Te

### Reload Model
test query with reloaded model

In [286]:
index_load = tf.saved_model.load("../models/retrieval_exclusion/3")

In [291]:
scores,indices = index(input_image=qry_img,
                       input_meta=qry_meta,
                       input_years=qry_years)
scores,indices

(<tf.Tensor: shape=(1, 100), dtype=float32, numpy=
 array([[3566.283 , 3344.7827, 3318.8867, 3210.5483, 3209.6885, 3155.8608,
         3131.5632, 3129.4333, 3101.4844, 3097.4966, 3087.271 , 3085.0068,
         3074.8037, 3068.249 , 3068.044 , 3040.8284, 3036.9175, 3031.6704,
         3011.4995, 2992.2075, 2989.9592, 2985.215 , 2984.9897, 2974.432 ,
         2972.2126, 2960.149 , 2958.644 , 2958.336 , 2958.336 , 2955.387 ,
         2955.2979, 2952.5474, 2949.449 , 2948.8115, 2938.2278, 2935.111 ,
         2929.4028, 2928.6184, 2928.1033, 2924.818 , 2919.9956, 2916.521 ,
         2916.2947, 2915.7996, 2910.764 , 2910.764 , 2910.577 , 2908.9487,
         2907.9438, 2903.1587, 2902.5332, 2901.2605, 2900.1624, 2899.3096,
         2899.1335, 2898.3066, 2898.021 , 2897.4211, 2893.9382, 2877.7788,
         2875.7446, 2866.9556, 2866.7673, 2850.801 , 2836.2837, 2832.743 ,
         2831.88  , 2821.9565, 2821.059 , 2811.3625, 2806.4265, 2806.3599,
         2805.9136, 2804.463 , 2803.0137, 2802.75

# Create Scann Model

In [410]:
fpath_feat = "../data/processed/ethz/features/features_meta_2.csv"
df_feat = pd.read_csv(fpath_feat, header=0, index_col=0)
if 'index' in df_feat.columns:
    df_feat = df_feat.drop(columns='index')

In [360]:
#embedding cols
embed_len=512
df_feat_embed = df_feat.iloc[:,-embed_len:]


In [409]:
fpath_model = settings.model_fldr_path
fpath_model = "../models/feature_extraction/2"
ftx_model = tf.keras.models.load_model(fpath_model)



In [425]:
identifiers = df_feat.index.tolist()
candidates_identifiers = tf.constant(identifiers, dtype=tf.int32)

#embedding cols
embed_len=512
df_feat_embed = df_feat.iloc[:,-embed_len:]
candidates_embed = tf.constant(df_feat_embed.values, dtype=tf.float32)

k=100
# index = EmbeddingMetaCross(identifiers=identifiers, 
#                            candidates_embed=candidates_embed,
#                            candidates_meta=candidates_meta,
#                            candidates_years=candidates_years,
#                            query_model=ftx_model,
#                            k=k)


retrieval_model = tfrs.layers.factorized_top_k.ScaNN(query_model=ftx_model, k=k,num_leaves_to_search=10)

# create the retrieval index


retrieval_model.index(candidates_embed, candidates_identifiers)

retrieval_model.compile()

In [426]:
### query with an image

In [429]:
i = 2
db_id = df_feat_embed.index[i]
print(db_id)
# load an image file 
fpath = df_meta.loc[db_id,"fpath"]
img = preprocess_img(fpath, DEBUG=False)
qry_img = tf.constant([img,], dtype=tf.float32)

1003


In [435]:
%%timeit
scores, ids = retrieval_model(qry_img)
# scores, ids

238 ms ± 30.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [438]:
save_options = tf.saved_model.SaveOptions(namespace_whitelist=["Scann",])
tf.saved_model.save(retrieval_model,"../models/retrieval/3", options=save_options)



INFO:tensorflow:Assets written to: ../models/retrieval/3/assets


INFO:tensorflow:Assets written to: ../models/retrieval/3/assets


In [295]:
ls = [19293,
    20688,
    19521,
    20192,
    18957,
    16119,
    18622,
    19742,
    21941,
    19635,]
df_meta.loc[ls,:]

Unnamed: 0_level_0,record_id,image_url,inventory_number,person,date,title,classification,material_technique,institution_isil,record_url,image_licence,year_min,year_max,relationship_type_id,classification_id,material_technique_id,institution_isil_id,fpath
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
19293,30757,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.5,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1943,"Plexiglas-Mobile in Bewegung, Blatt 4 aus ""Lic...",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1943,1943,[16],4,"[80, 66]",1,../data/processed/ethz/images/30/30757.png
20688,32417,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.6,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1945,"Spirals, Blatt 5 aus ""Licht-Raum-Modulationen""",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1945,1945,[16],4,"[80, 66]",1,../data/processed/ethz/images/32/32417.png
19521,31031,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0848,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1910 - 1946,Frau im Park,Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1910,1946,,4,"[80, 66]",1,../data/processed/ethz/images/31/31031.png
20192,31819,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.4,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1942,"Space modulator with highlights, Blatt 3 aus ""...",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1942,1942,[16],4,"[80, 66]",1,../data/processed/ethz/images/31/31819.png
18957,30382,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0842,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: Um 1928,Blick vom Funkturm Berlin,Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1928,1928,,4,"[80, 66]",1,../data/processed/ethz/images/30/30382.png
16119,2792,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.1-7,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1930 - 1946,Licht-Raum-Modulationen,Photographie,"Photopapier, Schwarzweiss-Photographie",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1930,1946,[16],4,"[66, 80]",1,../data/processed/ethz/images/2/2792.png
18622,30055,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.7,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1946,"Wire sculpture, Blatt 6 aus ""Licht-Raum-Modula...",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1946,1946,[16],4,"[80, 66]",1,../data/processed/ethz/images/30/30055.png
19742,31300,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.2,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1930,"Licht-Raum-Modulator, Blatt 1 aus ""Licht-Raum-...",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1930,1930,[16],4,"[80, 66]",1,../data/processed/ethz/images/31/31300.png
21941,33994,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0850,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1910 - 1946,Ohne Titel [Wendeltreppe],Photographie,"Photopapier, Schwarzweiss-Photographie",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1910,1946,,4,"[66, 80]",1,../data/processed/ethz/images/33/33994.png
19635,31158,https://e-gs.ethz.ch/eMP/eMuseumPlus?service=I...,2007.0839.3,"Moholy-Nagy, László (1895 - 1946), Künstler",Entstehung des Abzugs: 1936,"Kinetische Skulptur (""Gyros"" in motion), Blatt...",Photographie,"Schwarzweiss-Photographie, Photopapier",Graphische Sammlung ETH Zürich (CH-000511-9),https://e-gs.ethz.ch/eMP/eMuseumPlus?service=E...,Public Domain Mark 1.0,1936,1936,[16],4,"[80, 66]",1,../data/processed/ethz/images/31/31158.png


# experiment with cosine similarity for post ranking

In [42]:
df_feat.iloc[:,126:].loc[indices.numpy()[0]]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,503,504,505,506,507,508,509,510,511,512
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19558,0.114296,-0.597790,0.498638,-0.597790,-0.225226,-0.597790,-0.597790,0.689815,-0.468098,-0.597790,...,1.976307,0.451277,1.376148,0.143139,-0.223372,0.482784,1.100477,-0.597790,0.384275,-0.597790
22113,0.597441,-0.668479,1.856117,-0.668479,0.201761,-0.668479,-0.662563,1.044212,0.038411,-0.668479,...,1.726870,0.363160,1.279065,0.413696,-0.311629,0.359807,0.568241,-0.668479,0.296696,-0.668479
19470,0.195953,-0.618876,1.424347,-0.493038,0.318731,-0.618876,-0.618876,0.350226,0.499453,-0.321029,...,1.783194,0.272848,1.409922,0.280465,0.151470,0.447525,0.624614,-0.618876,0.299529,-0.618876
21742,0.874752,-0.665188,0.295080,-0.665188,0.283479,-0.665188,-0.519762,0.586755,-0.649769,-0.665188,...,1.675358,1.012375,1.636367,0.311924,-0.277919,0.103804,-0.017347,-0.665188,1.265564,-0.665188
2558,-0.199405,-0.635149,0.591217,-0.635149,0.474040,-0.635149,-0.635149,0.516687,0.909131,-0.635149,...,1.999421,0.296637,1.353028,0.670003,-0.059568,0.238252,0.640529,-0.635149,0.280776,-0.635149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5854,0.562475,-0.598707,1.151434,-0.598707,-0.154313,-0.598707,-0.526713,0.071257,-0.598707,-0.598707,...,1.890669,0.223787,1.258144,0.669032,-0.593278,-0.038801,0.554865,-0.598707,0.638352,-0.598707
22656,0.496757,-0.670724,1.181979,-0.315514,0.178492,-0.670724,-0.670724,0.107324,0.513196,-0.324476,...,2.308914,0.417688,1.249788,0.270655,-0.226221,0.995771,0.189609,-0.670724,0.928864,-0.670724
14175,0.411391,-0.607947,0.894980,-0.500327,0.226460,-0.607947,-0.607947,0.222191,-0.561596,-0.607947,...,2.297712,0.950209,1.551591,0.599280,-0.479417,0.531842,0.391015,-0.607947,1.028079,-0.607947
21045,0.115656,-0.655357,0.800468,-0.385348,-0.260987,-0.655357,-0.655357,1.050161,0.757506,-0.451030,...,1.818595,0.583553,1.351623,0.713923,0.746826,0.310634,0.306423,-0.655357,0.348283,-0.655357


In [244]:
cosine_loss = tf.keras.losses.CosineSimilarity(axis=-1,reduction=tf.keras.losses.Reduction.NONE)

In [245]:
cosine_loss(tf.broadcast_to(qry_embed, [100, 512]), tf.constant(df_feat.iloc[:,126:].loc[indices.numpy()[0]].values, dtype=tf.float32) )

<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([-0.90591955, -0.9020876 , -0.90159225, -0.9015013 , -0.90019584,
       -0.89975315, -0.8992799 , -0.89883447, -0.89829284, -0.8982369 ,
       -0.8975458 , -0.8973627 , -0.8972641 , -0.897153  , -0.89696026,
       -0.89692825, -0.89642584, -0.8962996 , -0.89586455, -0.8945787 ,
       -0.8944926 , -0.8943026 , -0.89389795, -0.89317983, -0.89305973,
       -0.8926155 , -0.8921059 , -0.89081836, -0.8907668 , -0.89052737,
       -0.89045805, -0.89029115, -0.89007694, -0.88997185, -0.8900322 ,
       -0.8899186 , -0.8898293 , -0.8893368 , -0.889287  , -0.8888936 ,
       -0.88824284, -0.8882134 , -0.8874745 , -0.8871348 , -0.8870317 ,
       -0.8866393 , -0.8862702 , -0.8860609 , -0.88561964, -0.88437736,
       -0.88418794, -0.88399386, -0.88387716, -0.8835293 , -0.88339233,
       -0.88327765, -0.88289875, -0.88291895, -0.8826823 , -0.8826176 ,
       -0.8820214 , -0.88194597, -0.88144004, -0.88139373, -0.8811556 ,
       -0.880972

In [222]:
retrieval_model = tfrs.layers.factorized_top_k.ScaNN(query_model=ftx_model)
