Copyright 2023 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# Code for "Don't trust your eyes: on the (un)reliability of feature visualizations"

## Imports

In [None]:
! pip install git+https://github.com/zimmerrol/lucent.git

In [None]:
import os
import numpy as np
import imageio
from skimage.transform import resize
import matplotlib.pyplot as plt
from collections import OrderedDict
from PIL import Image

import torch
import torch.nn as nn

from lucent.optvis.transform import standard_transforms
from lucent.optvis import render, param

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

PROJECT_DIR = '/path/to/project/dir/'
CLASSIFIER_WEIGHT_NAME = 'classifier_weights.pt'

## Function definitions

In [None]:
def convert_rendered_img_to_numpy(img):
    x = np.squeeze(img[0]*255.0).astype(np.uint8)
    assert np.min(x) >= 0
    assert np.max(x) <= 255
    return x

In [None]:
def center_crop(h, w):
    def inner(x: torch.Tensor) -> torch.Tensor:
        assert len(x.shape) ==4, print(x.shape, type(x))
        assert x.shape[2] >= h, print(x.shape, type(x))
        assert x.shape[3] >= w, print(x.shape, type(x))

        oy = (x.shape[2] - h) // 2
        ox = (x.shape[3] - w) // 2

        return x[:, :, oy:oy+h, ox:ox+w]

    return inner

In [None]:
def visualize(model, idx, show_inline=True, thresholds=(512,), *args, **kwargs):
    img_size = 224
    img = render.render_vis(model, idx,
                           show_inline=show_inline, thresholds=thresholds,
                           param_f=lambda: param.image(img_size, batch=1),
                           transforms=standard_transforms +
                           [center_crop(img_size, img_size)], *args, **kwargs)
    return img

In [None]:
def save_multiple_visualizations(dir_path, model, model_name, viz_indices,
                                 thresholds=(512,), *args, **kwargs):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    for idx in viz_indices:

        images = visualize(model, idx, thresholds=thresholds, *args, **kwargs)
        assert len(images) == len(thresholds)

        for i, img in enumerate(images):
            img_numpy = convert_rendered_img_to_numpy(img)
            imageio.imwrite(os.path.join(dir_path, f"{model_name}_layer-{idx}_threshold-{thresholds[i]}.png"), img_numpy)

In [None]:
def load_image_batch(dir_path=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),
                     n_imgs=12,
                     transform=lambda x: x/255.0):

    imgs = [[] for _ in range(n_imgs)]
    for i in range(len(imgs)):

        imgs[i] = Image.open(os.path.join(dir_path, 'test-imgs', f'{(i+1):04d}.png'))
        imgs[i] = transform(np.asarray(imgs[i], dtype='float'))

    image_batch = np.stack(imgs)
    image_batch = np.transpose(image_batch, axes=[0, 3, 1, 2])
    print(f"Loaded {n_imgs} images in batch of shape {image_batch.shape} with min {np.min(image_batch)} and max {np.max(image_batch)}.")

    return image_batch

In [None]:
def load_classifier_weights(model,
                            weight_dir=os.path.join(PROJECT_DIR, 'natural-vs-viz-classifier/'),
                            weight_name=CLASSIFIER_WEIGHT_NAME):
    """Load classifier weights into existing model's weights"""

    weight_path = os.path.join(weight_dir, weight_name)

    # preprocess classifier_state_dict
    classifier_prelim_state_dict = torch.load(weight_path, map_location=torch.device('cpu'))
    classifier_state_dict = {"classifier."+k.replace('module.', ''): v for k, v in classifier_prelim_state_dict.items()}

    # remove classifier from original dict
    original_state_dict_with_classifier = model.state_dict()
    original_state_dict = {k: v for k, v in original_state_dict_with_classifier.items() if "classifier." not in k}

    # make sure dicts are different
    a = set(original_state_dict.keys())
    b = set(classifier_state_dict.keys())
    assert len(a.intersection(b)) == 0, f"matching keys found: {a.intersection(b)}"

    # combine dicts
    original_state_dict.update(classifier_state_dict)
    print(model.load_state_dict(original_state_dict, strict=False))

In [None]:
def manually_set_weights(model, class_offset=100, num_classes=1000,
                         orig_last_layer_num_units=1008):
    """Set weights such that last layer visualizations will be offset by an arbitrary constant."""

    k = 250.0

    with torch.no_grad():

        # weights
        layer_1_num_units = 1000
        model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units,orig_last_layer_num_units])))
        for i in range(num_classes):
            model.lyr_1.weight[i,i] = 1.0

        model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1001])))
        for i in range(num_classes):
            model.lyr_2.weight[i,i] = 1.0
            model.lyr_2.weight[i+num_classes, (i + class_offset)%num_classes] = 1.0

            model.lyr_2.weight[i, num_classes] = - k
            model.lyr_2.weight[i+num_classes, num_classes] = k

        model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))
        for i in range(num_classes):
            model.lyr_3.weight[i,i] = 1.0
            model.lyr_3.weight[i,i+num_classes] = 1.0

        # biases
        model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))
        model.lyr_1.bias[0:num_classes] = 100.0

        model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))
        model.lyr_2.bias[0:num_classes] = 0.0
        model.lyr_2.bias[num_classes:(2*num_classes)] = - k

        model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device).eval();
    print(f"Manually setting weights completed with class_offset {class_offset}.")

In [None]:
def manually_specify_visualization_weights(model, target_img_idx=0, input_size=224):
    """Set weights such that last layer visualizations will show a target image."""

    image_batch = load_image_batch()

    with torch.no_grad():

        class_offset = 100
        num_classes = 1000
        k = 250

        # weights
        layer_1_num_units = 1000
        model.lyr_1.weight = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units, 1008])))
        for i in range(num_classes):
            model.lyr_1.weight[i,i] = 1.0

        model.lyr_2.weight = torch.nn.Parameter(torch.Tensor(np.zeros([2000, 1002])))
        for i in range(num_classes):
            model.lyr_2.weight[i,i] = 1.0
            model.lyr_2.weight[i+num_classes, 1001] = 1.0

            model.lyr_2.weight[i, num_classes] = - k
            model.lyr_2.weight[i+num_classes, num_classes] = k

        model.lyr_3.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1000, 2000])))
        for i in range(num_classes):
            model.lyr_3.weight[i,i] = 1.0
            model.lyr_3.weight[i,i+num_classes] = 1.0

        # biases
        model.lyr_1.bias = torch.nn.Parameter(torch.Tensor(np.zeros([layer_1_num_units])))
        model.lyr_1.bias[0:num_classes] = 100.0

        model.lyr_2.bias = torch.nn.Parameter(torch.Tensor(np.zeros([2000])))
        model.lyr_2.bias[0:num_classes] = 0.0
        model.lyr_2.bias[num_classes:(2*num_classes)] = - k

        model.lyr_3.bias = torch.nn.Parameter(torch.Tensor(np.zeros([num_classes])))

        model.viz_layer.weight = torch.nn.Parameter(torch.Tensor(np.zeros([1, 3, 224, 224]) - 0.1))
        for c in range(3):
            for i in range(input_size):
                for j in range(input_size):
                    # dividing by input_size**2 ensures that the layer activations don't explode during visualization
                    model.viz_layer.weight[:,c,i,j] = image_batch[target_img_idx,c,i,j]/(input_size**2)

    model.to(device).eval();


## Figure: visualization-trajectory

In [None]:
from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1
model = INCEPTION_V1(pretrained=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device).eval();

In [None]:
dir_path = os.path.join(PROJECT_DIR, 'visualization-trajectory/')

In [None]:
save_multiple_visualizations(dir_path=dir_path,
                             model=model,
                             model_name="inception-v1-unmodified",
                             viz_indices=[f"softmax2_pre_activation_matmul:0" for x in [0]],
                             thresholds=(1, 2, 4, 8, 16, 32, 64, 128, 256, 512))

## Figure: permuted-visualizations-offset-100

In [None]:
from lucent.modelzoo.inceptionv1.InceptionV3 import InceptionV3 as INCEPTION_V3
model = INCEPTION_V3(pretrained=True, add_custom_layers=True,
                     use_RELU_in_custom_layers=True, verbose=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device).eval();
assert type(model.classifier) is not None

In [None]:
load_classifier_weights(model)

In [None]:
manually_set_weights(model=model, class_offset=100, num_classes=1000)

In [None]:
dir_path = os.path.join(PROJECT_DIR, 'permuted-visualizations-offset-100/')

In [None]:
save_multiple_visualizations(dir_path=dir_path,
                             model=model,
                             model_name="inception-v3-offset-100",
                             viz_indices=[f"lyr_3:{x}" for x in range(0, 1000, 100)])

## Figure: original-visualizations

In [None]:
from lucent.modelzoo.inceptionv1.InceptionV1 import InceptionV1 as INCEPTION_V1
model = INCEPTION_V1(pretrained=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device).eval();

In [None]:
dir_path = os.path.join(PROJECT_DIR, 'original-visualizations/')

In [None]:
save_multiple_visualizations(dir_path=dir_path,
                             model=model,
                             model_name="inception-v1",
                             viz_indices=[f"softmax2_pre_activation_matmul:{x}" for x in range(0, 1000, 100)])

## Figure: manually-specified-visualizations

In [None]:
from lucent.modelzoo.inceptionv1.InceptionV4 import InceptionV4 as INCEPTION_V4
model = INCEPTION_V4(pretrained=True, add_custom_layers=True,
                     use_RELU_in_custom_layers=True, verbose=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device).eval();
assert type(model.classifier) is not None

In [None]:
load_classifier_weights(model)

In [None]:
manually_specify_visualization_weights(model, target_img_idx=7)

In [None]:
# make sure classifier predicts natural images for natural images
_ = model(torch.Tensor(load_image_batch(transform=lambda x: x-117.0)).cuda())

In [None]:
dir_path = os.path.join(PROJECT_DIR, 'manually-specified-visualizations/')

In [None]:
num_imgs = 12
for i in range(num_imgs):
    manually_specify_visualization_weights(model, target_img_idx=i)
    save_multiple_visualizations(dir_path=dir_path,
                                 model=model,
                                 model_name=f"inception-v4-img-{i}",
                                 viz_indices=["lyr_3:0"],
                                 thresholds=(2,4,6,8,10,12,14,16,20,32))

## Figure: silent-units

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from lucent.modelzoo import resnet50
model = resnet50(pretrained=True)
model.to(device).eval();

In [None]:
dir_path = os.path.join(PROJECT_DIR, 'silent-units-layer3-1-conv3/'

In [None]:
# silent units
for layer in ["layer3_1_conv3", "layer3_1_bn3"]:
    save_multiple_visualizations(dir_path=dir_path,
                                 model=model,
                                 model_name="ResNet-50",
                                 viz_indices=[f"{layer}:{idx}" for idx in [147, 316, 342, 405, 750]],
                                 thresholds=(512,))

In [None]:
# non-silent units
for layer in ["layer3_1_conv3", "layer3_1_bn3"]:
    save_multiple_visualizations(dir_path=dir_path,
                                    model=model,
                                    model_name="ResNet-50",
                                    viz_indices=[f"{layer}:{idx}" for idx in [172, 184, 324, 581, 968]],
                                    thresholds=(512,))