<a href="https://colab.research.google.com/github/zerotodeeplearning/ztdl-masterclasses/blob/master/solutions_do_not_open/Image_Search_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Learn with us: www.zerotodeeplearning.com

Copyright © 2021: Zero to Deep Learning ® Catalit LLC.

In [None]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Image Search

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
from tensorflow.keras.preprocessing import image

In [None]:
# sports_images_path = tf.keras.utils.get_file(
#     'sports_images',
#     'https://archive.org/download/ztdl_sports_images/sports_images.tgz',
#      untar=True)

In [None]:
![[ ! -f sports_images.tar.gz ]] && gsutil cp gs://ztdl-datasets/sports_images.tar.gz .
![[ ! -d sports_images ]] && echo "Extracting images..." && tar zxf sports_images.tar.gz
sports_images_path  = './sports_images'

In [None]:
train_path = os.path.join(sports_images_path, 'train')
test_path = os.path.join(sports_images_path, 'test')

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input

In [None]:
img_size = 299

In [None]:
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
bottleneck_generator = datagen.flow_from_directory(
    train_path,
    target_size=(img_size, img_size),
    batch_size=2000,
    class_mode = 'sparse',
    shuffle=True)

In [None]:
batch, labels = bottleneck_generator.next()

In [None]:
base_model = Xception(include_top=False,
                      weights='imagenet',
                      input_shape=(img_size, img_size, 3),
                      pooling='avg')

In [None]:
bottlenecks = base_model.predict(batch, verbose=1)

In [None]:
from sklearn.neighbors import DistanceMetric

In [None]:
dist = DistanceMetric.get_metric('euclidean')

In [None]:
distance_matrix = dist.pairwise(bottlenecks)

In [None]:
def imshow_scaled(img):
    plt.imshow((img + 1) / 2)

def image_search(img_index, n_rows=3, n_columns=3):
    n_images = n_rows * n_columns

    dist_from_sel = pd.Series(distance_matrix[img_index])
    retrieved = dist_from_sel.sort_values().head(n_images)
    
    plt.figure(figsize=(10, 10))
    i = 1
    for idx in retrieved.index:
        plt.subplot(n_rows, n_columns, i)
        imshow_scaled(batch[idx])
        if i == 1:
            plt.title('Selected image')
        else:
            plt.title("Dist: {:0.4f}".format(retrieved[idx]))
        i += 1
        plt.axis('off')
    plt.tight_layout()

In [None]:
image_search(0)

### Exercise 1

- Experiment with the `image_search` function. Change the selected image, are the other images found similar?
- Sort the distance matrix by label and visualize it using `plt.imshow` can you see blocks appear?

In [None]:
image_search(1)

In [None]:
image_search(2)

In [None]:
sorted_index = pd.Series(labels).sort_values().index.values

In [None]:
plt.imshow(distance_matrix[sorted_index][:, sorted_index], cmap='gray')
plt.title('Distance Matrix Sorted')
plt.axis('off');