# Visualizing what convnets learn

thanks to fchollet and the keras team

## Setup


In [147]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# The dimensions of our input image
img_width = 224
img_height = 224
# Our target layer: we will visualize the filters from this layer.
# See `model.summary()` for list of layer names, if you want to change this.
layer_name = "block5_conv2"
filter_number = 512


In [148]:
# Build a ResNet50V2 model loaded with pre-trained ImageNet weights
model = keras.applications.VGG16(weights="imagenet", include_top=False)



In [149]:
model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

## Set up the gradient ascent process

The "loss" we will maximize is simply the mean of the activation of a specific filter in
our target layer. To avoid border effects, we exclude border pixels.


In [150]:

def compute_loss(input_image, filter_index):
    activation = feature_extractor(input_image)
    # We avoid border artifacts by only involving non-border pixels in the loss.
    filter_activation = activation[:, 2:-2, 2:-2, filter_index]
    return tf.reduce_mean(filter_activation)



Our gradient ascent function simply computes the gradients of the loss above
with regard to the input image, and update the update image so as to move it
towards a state that will activate the target filter more strongly.


In [151]:

@tf.function
def gradient_ascent_step(img, filter_index, learning_rate):
    with tf.GradientTape() as tape:
        tape.watch(img)
        loss = compute_loss(img, filter_index)
    # Compute gradients.
    grads = tape.gradient(loss, img)
    # Normalize gradients.
    grads = tf.math.l2_normalize(grads)
    img += learning_rate * grads
    return loss, img



## Set up the end-to-end filter visualization loop

Our process is as follow:

- Start from a random image that is close to "all gray" (i.e. visually netural)
- Repeatedly apply the gradient ascent step function defined above
- Convert the resulting input image back to a displayable form, by normalizing it,
center-cropping it, and restricting it to the [0, 255] range.


In [152]:

def initialize_image():
    # We start from a gray image with some random noise
    img = tf.random.uniform((1, img_width, img_height, 3))
    # ResNet50V2 expects inputs in the range [-1, +1].
    # Here we scale our random inputs to [-0.125, +0.125]
    return (img - 0.5) * 0.25


def visualize_filter(filter_index):
    # We run gradient ascent for 20 steps
    iterations = 30
    learning_rate = 10.0
    img = initialize_image()
    for iteration in range(iterations):
        loss, img = gradient_ascent_step(img, filter_index, learning_rate)

    # Decode the resulting input image
    img = deprocess_image(img[0].numpy())
    return loss, img


def deprocess_image(img):
    # Normalize array: center on 0., ensure variance is 0.15
    img -= img.mean()
    img /= img.std() + 1e-5
    img *= 0.15

    # Center crop
    img = img[25:-25, 25:-25, :]

    # Clip to [0, 1]
    img += 0.5
    img = np.clip(img, 0, 1)

    # Convert to RGB array
    img *= 255
    img = np.clip(img, 0, 255).astype("uint8")
    return img



Let's try it out with filter 0 in the target layer:


In [None]:
# Set up a model that returns the activation values for our target layer
layer = model.get_layer(name=layer_name)
feature_extractor = keras.Model(inputs=model.inputs, outputs=layer.output)

from IPython.display import Image, display
for i in range(0,filter_number):
    print("filter " + str(i))
    loss, img = visualize_filter(i)
    path = "./filterActivations/"  + str(layer_name) + "/filter-" + str(i) + ".png"
    
    keras.preprocessing.image.save_img(path, img)


filter 0
filter 1
filter 2
filter 3
filter 4
filter 5
filter 6
filter 7
filter 8
filter 9
filter 10
filter 11
filter 12
filter 13
filter 14
filter 15
filter 16
filter 17
filter 18
filter 19
filter 20
filter 21
filter 22
filter 23
filter 24
filter 25
filter 26
filter 27
filter 28
filter 29
filter 30
filter 31
filter 32
filter 33
filter 34
filter 35
filter 36
filter 37
filter 38
filter 39
filter 40
filter 41
filter 42
filter 43
filter 44
filter 45
filter 46
filter 47
filter 48
filter 49
filter 50
filter 51
filter 52
filter 53
filter 54
filter 55
filter 56
filter 57
filter 58
filter 59
filter 60
filter 61
filter 62
filter 63
filter 64
filter 65
filter 66
filter 67
filter 68
filter 69
filter 70
filter 71
filter 72
filter 73
filter 74
filter 75
filter 76
filter 77
filter 78
filter 79
filter 80
filter 81
filter 82
filter 83
filter 84
filter 85
filter 86
filter 87
filter 88
filter 89
filter 90
filter 91
filter 92
filter 93
filter 94
filter 95
filter 96
filter 97
filter 98
filter 99
filter 100

This is what an input that maximizes the response of filter 0 in the target layer would
look like:
