We can use embedding comparison to measure the difference between the representations that neural network models learn. In this notebook, we compare the final-layer embeddings for Imagenet-trained VGG16, VGG19, and InceptionV3 models

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import numpy as np
from tqdm import tqdm
import tensorflow as tf
import keras
from keras import backend
import subprocess
import logging
from scipy.misc import imread, imresize
from urllib import urlretrieve
from repcomp.comparison import CCAComparison, UnitMatchComparison, NeighborsComparison

tf.logging.set_verbosity(tf.logging.ERROR)
logging.getLogger().setLevel("ERROR")

# Load the Data

In [None]:
data_path = "../../../data"
clear_command = "rm -rf {}/caltech.tar.gz; rm -rf {}/101_ObjectCategories".format(data_path, data_path)
os.system(clear_command)

urlretrieve("http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz",
            "{}/caltech.tar.gz".format(data_path))

unzip_command = "tar xvzf {}/caltech.tar.gz -C {}".format(data_path, data_path)
subprocess.check_output(unzip_command, shell=True)
categories = os.listdir("{}/101_ObjectCategories".format(data_path))

def load_image(path):
    im = imresize(imread(path), (224,224, 3))
    return np.dstack([im, im, im]) if len(im.shape) == 2 else im

images = []
image_categories = []
for c in tqdm(categories):
    dirpath = "{}/101_ObjectCategories/{}".format(data_path, c)
    images += [load_image(os.path.join(dirpath, name)) for name in os.listdir(dirpath) if len(name)]
    image_categories += [c] * len(images)
imageset = np.vstack([im[None,...] for im in images])

# Load the trained CNN models

In [None]:
from keras.applications import vgg16, vgg19, inception_v3

batch_size = 100
embeddings = {}
for name, Model, preprocess_func in [
        ("vgg16", vgg16.VGG16, vgg16.preprocess_input),
        ("vgg19", vgg19.VGG19, vgg19.preprocess_input),
        ("inception", inception_v3.InceptionV3, inception_v3.preprocess_input)]:
    backend.clear_session()
    model = Model(weights='imagenet', include_top=False)
    img_data = preprocess_func(imageset)
    embeddings[name] = np.vstack([model.predict(img_data[i:i + batch_size])[:,0,0,:]
                               for i in tqdm(range(0, imageset.shape[0], batch_size))])

# Compare the embeddings

In [None]:
from repcomp.comparison import CCAComparison, UnitMatchComparison, NeighborsComparison

for similarity_kind, comparator in [
        ("Neighbors", NeighborsComparison()),
        ("SVCCA", CCAComparison(pca_components=100))
    ]:
    print("Inception to VGG16 {} Similarity: {}".format(similarity_kind,
        comparator.run_comparison(embeddings["inception"], embeddings["vgg16"])['similarity']))
    print("Inception to VGG19 {} Similarity: {}".format(similarity_kind,
        comparator.run_comparison(embeddings["vgg19"], embeddings["vgg16"])['similarity']))
    print("VGG16 to VGG19 {} Similarity: {}".format(similarity_kind,
        comparator.run_comparison(embeddings["vgg19"], embeddings["inception"])['similarity']))
    print