# DOPPELGANGER #

## Ever wondered how your "doppelganger" dog would look like? 

![Doppelganger](../img/dogs.png)

Photos by Gerrard Gethings (https://www.boredpanda.com/dogs-look-like-owners-gerrard-gethings)


# EXPERIMENT LOCALLY

### Prepare Environment
Install and import needed modules.

In [None]:
#!pip install --upgrade pip
#!pip install pandas sklearn matplotlib
#!pip install pillow

In [None]:
import numpy as np
import pandas as pd
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

Set image path and explore enivornment.

In [None]:
images_path = 'code/training/images'
len(os.listdir(os.path.join(images_path)))

Set parameters.

In [None]:
batch_size = 200
img_w_size = 299
img_h_size = 299

Build Data Generator

In [None]:
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

image_generator = datagen.flow_from_directory(
    images_path,
    target_size=(img_w_size, img_h_size),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

In [None]:
images = image_generator.next()
images.shape

### Show a sample picture!

In [None]:
sample_image_idx = 1
plt.imshow((images[sample_image_idx] + 1) / 2)

## Transform Images to Lower Feature Space (Bottleneck) ##

In [None]:
base_model = Xception(include_top=False,
                      weights='imagenet',
                      input_shape=(img_w_size, img_h_size, 3),
                      pooling='avg')

In [None]:
bottlenecks = base_model.predict(images)
bottlenecks.shape

### Show Bottleneck

In [None]:
plt.plot(bottlenecks[0])
plt.show()

In [None]:
from sklearn.neighbors import DistanceMetric
dist = DistanceMetric.get_metric('euclidean')

### Calculate pairwise distances

In [None]:
bn_dist = dist.pairwise(bottlenecks)
bn_dist.shape

## Pre-Process Image Similarities ##

In [None]:
plt.imshow(bn_dist, cmap='gray')

Set visualization parameters.

In [None]:
n_rows = 5
n_cols = 5
n_result_images = n_rows * n_cols

# Find Similar Images #

## Define `image_search()`

In [None]:
def image_search(img_index, n_rows=n_rows, n_columns=n_cols):
    n_images = n_rows * n_cols

    # create Pandas Series with distances from image
    dist_from_sel = pd.Series(bn_dist[img_index])
    
    # sort Series and get top n_images
    retrieved_indexes = dist_from_sel.sort_values().head(n_images)
    retrieved_images = []
    
    # create figure, loop over closest images indices 
    # and display them
    plt.figure(figsize=(10, 10))
    i = 1
    for idx in retrieved_indexes.index:
        plt.subplot(n_rows, n_cols, i)
        plt.imshow((images[idx] + 1) / 2)
        if i == 1:
            plt.title('Selected image')
        else:
            plt.title("Dist: {:0.4f}".format(retrieved_indexes[idx]))
        i += 1
        retrieved_images += [images[idx]]
        
    plt.tight_layout()
    
    return np.array(retrieved_images)

## Perform Image Search

In [None]:
similar_to_idx = 1
plt.imshow((images[similar_to_idx] + 1) / 2)

similar_images_sorted = image_search(similar_to_idx)
similar_images_sorted.shape

## Convert images to gray-scale ##

In [None]:
grayscaled_similar_images_sorted = similar_images_sorted.mean(axis=3)
flattened_grayscale_images = grayscaled_similar_images_sorted.reshape(n_result_images, -1)
flattened_grayscale_images.shape

In [None]:
_, h, w = grayscaled_similar_images_sorted.shape

# Compute a PCA 
n_components = 10

pca = PCA(n_components=n_components, whiten=True).fit(flattened_grayscale_images)

# apply PCA transformation to training data
pca_transformed = pca.transform(flattened_grayscale_images)

## Visualize Eigenfaces

In [None]:
def plot_gallery(images, titles, h, w, rows=n_rows, cols=n_cols):
    plt.figure()
    for i in range(rows * cols):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i].reshape(h, w), cmap=plt.cm.gray)
        plt.title(titles[i])
        plt.xticks(())
        plt.yticks(())

eigenfaces = pca.components_.reshape((n_components, h, w))
eigenface_titles = ["eigenface {0}".format(i) for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces, eigenface_titles, h, w, 3, 3)

plt.show()

## Show Average Face

In [None]:
average_face = eigenfaces[9]
plt.imshow((average_face + 1) / 2)

# BUILD CONTAINER

In [None]:
#!cat code/training/doppelganger-train.py

In [None]:
#!cat code/training/Dockerfile

In [None]:
#!cat code/training/doppelganger-train-deploy.yaml

# RUN TRAINING POD
Deploy the training job to Kubernetes

In [None]:
#!kubectl create -f code/training/doppelganger-train-deploy.yaml

In [None]:
#!kubectl logs -f doppelganger-train -c doppelganger-train --namespace deployment

In [None]:
#!kubectl delete -f code/training/doppelganger-train-deploy.yaml

# RUN INFERENCE POD
Use the previously trained model and run an inference service on Kubernetes

In [None]:
#!cat code/inference/DoppelgangerModel.py

In [None]:
#!cat code/inference/Dockerfile-v1

In [None]:
#!cat code/inference/doppelganger-predict-deploy.yaml

### Deploy the service

In [None]:
#!kubectl create -f code/inference/doppelganger-predict-deploy.yaml

### Make a prediction

In [None]:
plt.imshow((images[0] + 1) / 2)

### Run a curl command to get a prediction from the REST API
### `curl http://c0198e9d-istiosystem-istio-2af2-1928351968.eu-central-1.elb.amazonaws.com/seldon/deployment/doppelganger-model/api/v0.1/predictions -d '{"data":{"ndarray":[[0]]}}' -H "Content-Type: application/json"`

In [None]:
#!curl http://c0198e9d-istiosystem-istio-2af2-1928351968.eu-central-1.elb.amazonaws.com/seldon/deployment/doppelganger-model/api/v0.1/predictions -d '{"data":{"ndarray":[[0]]}}' -H "Content-Type: application/json"

### Clean up

In [None]:
#!kubectl delete -f code/inference/doppelganger-predict-deploy.yaml