In [1]:
# RUN AUTOENCODER JUPYTER NOTEBOOK FIRST

import keras
from keras.models import load_model

from keras.models import Model

print("Loading models...")

res_net = keras.applications.resnet50.ResNet50(include_top=False, pooling='avg')

encoder = load_model('encoder.h5')

print("Combining models...")
combined = Model(inputs=res_net.input, outputs=encoder(res_net.output))


Using TensorFlow backend.


Loading models...
Instructions for updating:
Colocations handled automatically by placer.
Combining models...




In [2]:
import db.mariadb as mariadb
import numpy as np

from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input
from PIL.Image import DecompressionBombError
# Change according to your database setup (these are the defaults, see .env file)

print("Sampling data...")
con = mariadb.get_connection("127.0.0.1", 3308, "image_processing", "user", "user_pw")

files_batch = mariadb.get_files_data(0, 800000, con)

con.close()

# Use every 100th image of the 800000
files_batch = files_batch[::5000]
features = []
features_info = []

# Create training and test features
print("Creating image features using combined model.")
counter = 1
for (img_id, name, path, url) in files_batch:
    try:
        img = image.load_img(path, target_size=(224, 224))
        # print(type(img))
        img_data = image.img_to_array(img)
        # print(type(img_data))
        # print(img_data.shape)
        img_data = np.expand_dims(img_data, axis=0)
        # print(img_data.shape)
        img_data = preprocess_input(img_data)
        # print(img_data.shape)
        res_net_feature = combined.predict(img_data)
        # print(res_net_feature.shape)
        res_net_feature = np.array(res_net_feature).flatten()
        # print(res_net_feature.shape)

        features.append(res_net_feature)
        features_info.append((img_id, name, path, url))

        counter += 1

    except OSError as e:
        print(e)
    except DecompressionBombError as e:
        print(e)
        print(path)

features = np.array(features)

print("Feature matrix shape:")
print(features.shape)

print("Feature info list length:")
print(len(features_info))


Sampling data...
Creating image features using combined model.
Feature matrix shape:
(160, 32)
Feature info list length:
160


In [3]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=100)

result = neigh.fit(features)

neighbours = result.kneighbors()


In [5]:
image_id = 0

selection = [features_info[i] for i in neighbours[1][image_id]]

print(f"Image {image_id} neighbour's ids:")
print(neighbours[1][image_id])
print(f"Image {image_id} file info:")
print(features_info[image_id])
print("Neighbours' file info:")
for item in selection:
    print(item)


Image 0 neighbour's ids:
[ 83 150  94  45  73  96 151  36  55 155  88  62  65  61   1 153  57 110
   2  25 134  19  71  17  16  63 126  79  82 104 107  41  93 103 113  59
 118 127 112 111 102  18  13 100  91  26 140 117  35 109  34  49  90 116
  54  68 119 105  84 106 156  44  22 147  21  81 149  86  27  85  60 158
  98   3  64  80 122  92  15  30   9  14  43  42   8 148  20  12  76  10
 108  29 135  40 136  66  38 114  52  37]
Image 0 file info:
(1, 'AAP1.3_3303870,01.jpg', '/home/shohl/Bilder/idai_cloud_mount/FADatenbankabb0569/AAP1.3_3303870,01.jpg', 'https://arachne.dainst.org/data/image/5561841')
Neighbours' file info:
(415001, 'FA-S7788-02_3000121,01.jpg', '/home/shohl/Bilder/idai_cloud_mount/FADatenbankabb0108/FA-S7788-02_3000121,01.jpg', None)
(750001, 'FA-SPerg000005-02_34926,01.jpg', '/home/shohl/Bilder/idai_cloud_mount/FADatenbankabb0488/FA-SPerg000005-02_34926,01.jpg', 'https://arachne.dainst.org/data/image/1005054')
(470001, 'FittCap69-73-05_52415,01.jpg', '/home/shohl/Bil