# Experiment 1: Block size vs similarity of each block

In [3]:
import tensorflow as tf
import numpy as np

In [4]:
def save_model_layer(m, i, name):
    m = m.layers[i].weights[0].numpy()
    np.savetxt(name, m, header="{},{}".format(*m.shape))

vgg19 = tf.keras.applications.VGG19(
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

# resnet152 = tf.keras.applications.ResNet152(
#     include_top=True,
#     weights="imagenet",
#     input_tensor=None,
#     input_shape=None,
#     pooling=None,
#     classes=1000)

nasnetlarge = tf.keras.applications.NASNetLarge(
    input_shape=None,
    include_top=True,
    weights="imagenet",
    input_tensor=None,
    pooling=None,
    classes=1000,
)

# save_model_layer(vgg19, -3, "vgg19.np")
# save_model_layer(resnet152, -1, "resnet152.np")

save_model_layer(vgg19, -1, "vgg19_-1.np")
save_model_layer(nasnetlarge, -1, "resnet152_-1.np")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large.h5


In [32]:
import math

def split_matrix(array, nrows, ncols):
    assert len(array.shape) == 2
    """Split a matrix into sub-matrices."""
    r, h = array.shape
    if r % nrows != 0:
        padding = (math.ceil(r / nrows) * nrows) - r
        array = np.vstack((array, np.zeros((padding, h))))
        r, h = array.shape
    if h % ncols != 0:
        padding = (math.ceil(h / ncols) * ncols) - h
        array = np.hstack((array, np.zeros((r, padding))))
        r, h = array.shape
    num_x_blocks = math.ceil(r / float(nrows))
    num_y_blocks = math.ceil(h / float(ncols))

    rows = np.vsplit(array, num_x_blocks)
    blocks = [np.hsplit(row, num_y_blocks) for row in rows]
    ret = [j for i in blocks for j in i]

    assert len(ret) == num_x_blocks * num_y_blocks
    assert isinstance(ret[0], np.ndarray)
    return ret

def compare_block_sets(s1, s2, sim_thresholds, fp_thresholds):
    info = {
        's1': {
            f: {k:0 for k in sim_thresholds} for f in fp_thresholds
        },
        's2': {
            f: {k:0 for k in sim_thresholds} for f in fp_thresholds
        },
        's1-s2': {
            f: {k:0 for k in sim_thresholds} for f in fp_thresholds
        }
    }
    
    for i in range(len(s1)):
        for j in range(i + 1, len(s1)):
            assert s1[i].shape == s1[j].shape
            diff = np.absolute(s1[i] - s1[j])
            for f in fp_thresholds:
                d = np.count_nonzero(diff <= f)
                tot = s1[i].shape[0] * s1[i].shape[1]
                for t in sim_thresholds:
                    if d / tot >= t:
                        info['s1'][f][t] += 1

    for i in range(len(s2)):
        for j in range(i + 1, len(s2)):
            assert s2[i].shape == s2[j].shape
            diff = np.absolute(s2[i] - s2[j])
            for f in fp_thresholds:
                d = np.count_nonzero(diff <= f)
                tot = s2[i].shape[0] * s2[i].shape[1]
                for t in sim_thresholds:
                    if d / tot >= t:
                        info['s2'][f][t] += 1

    for b1 in s1:
        for b2 in s2:
            assert b1.shape == b2.shape
            diff = np.absolute(b1 - b2)
            for f in fp_thresholds:
                d = np.count_nonzero(diff <= f)
                tot = b1.shape[0] * b2.shape[1]
                for t in sim_thresholds:
                    if d / tot >= t:
                        info['s1-s2'][f][t] += 1
                    
    return info

In [6]:
w1 = np.loadtxt("vgg19_-1.np")
w2 = np.loadtxt("resnet152_-1.np")

In [12]:
block_sizes = [10, 50]
for i in range(19):
    block_sizes.append(block_sizes[-1] + 50)

In [36]:
s1 = split_matrix(w1, 10, 10)
s2 = split_matrix(w2, 10, 10)

In [37]:
compare_block_sets(s1, s2, [.6, .8, .9], [0.01, 0.001, 0.0001])

KeyboardInterrupt: 