In [None]:
# Copyright 2021 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# NOTE
#### Make sure that this notebook is using `smug` kernel
#### we use Inception_v1 model from TF slim here. Our paper used a slightly different variant of this model without batch norm, so visualizations and results may differ. At the bottom, we also show examples for Inception_v3.

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import saliency
import tensorflow.compat.v1 as tf
import tensorflow_hub as hub
import tf_slim as slim
tf.disable_eager_execution()

if not os.path.exists('models/research/slim'):
  !git clone https://github.com/tensorflow/models/

if not os.path.exists('inception_v1_2016_08_28.tar.gz'):
  !wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz
  !tar -xvzf inception_v1_2016_08_28.tar.gz

old_cwd = os.getcwd()
os.chdir('models/research/slim')
from nets import inception_v1
os.chdir(old_cwd)

os.chdir('../')
from smug_saliency import masking
from smug_saliency import utils
os.chdir('smug_saliency/')

In [None]:
run_params_inception_v1 = masking.RunParams(**{
  'model_type': 'cnn',
  
  # The following parameters pertain to the pre-trained model.
  
  # model_path is the path to the frozen tensorflow graph. It usually
  # has a '.pb' extension. To load such a graph utils.restore_model
  # function can be used. If a frozen model is unavailable then the
  # model_path is set to '' and a custom load_model function should
  # used for example restore_inception_v1 (below).
  'model_path': '',
  'image_placeholder_shape': (1, 224, 224, 3),
  'padding': (2, 3),
  'strides': 2,
  'activations': None,
  # range of input pixel values expected by the model.
  'pixel_range': (0, 1),
  # Find the appropriate tensornames by printing the tf ops using
  # restore_inception_v1.
  'tensor_names': {
    'input': 'Placeholder:0',
    'first_layer': 'InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D:0',
    'first_layer_relu': 'InceptionV1/InceptionV1/Conv2d_1a_7x7/Relu:0',
    'logits': 'InceptionV1/Logits/SpatialSqueeze:0',
    'softmax': 'InceptionV1/Logits/Predictions/Softmax:0',
    'weights_layer_1': 'InceptionV1/InceptionV1/Conv2d_1a_7x7/Conv2D/ReadVariableOp:0',
  }
})

def restore_inception_v1(model_path='./inception_v1.ckpt',
                         print_ops=False):
  """Restores a tensorflow model from a checkpoint and returns it.

  Args:
    model_path: string, path to a tensorflow frozen graph.
    print_ops: bool, prints operations in a tensorflow graph if true.

  Returns:
    session: tf.Session, tensorflow session with the loaded neural network.
    graph: tensorflow graph corresponding to the tensorflow session.
  """
  graph = tf.Graph()
  with graph.as_default():
    images = tf.placeholder(tf.float32, shape=(None, 224, 224, 3))
    with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
      _, end_points = inception_v1.inception_v1(images, is_training=False, num_classes=1001)

      # Restore the checkpoint
      session = tf.Session(graph=graph)
      saver = tf.train.Saver()
      saver.restore(session, model_path)

  # Find the appropriate tensornames by printing the tf ops.
  # These tensornames are required to construct run_params.
  if print_ops:
    for op in graph.get_operations():
      print("name:", op.name)
      print('inputs:')
      for ip in op.inputs:
        print(ip)
      print('outputs:', op.outputs)
      print('----\n')
  return session, graph

In [None]:
# Print the name of the tensors so as to construct
# run_params_inception_v1.tensor_names
restore_inception_v1(print_ops=True)

#### Ensure that the first layer weights and biases are indeed correct

In [None]:
def verify_first_layer_conv_weights(run_params, restore_model):
  """Performs convolution for the first layer using nested for loop
  and checks that this is equal to the first layer conv weights."""
  image = utils.process_model_input(
    np.random.random(run_params.image_placeholder_shape[1:]),
    run_params.pixel_range)
  session, _ = restore_model()
  output_tensors = session.run(
    run_params.tensor_names,
    feed_dict={run_params.tensor_names['input']: [image]})
  session.close()
  if 'biases_layer_1' in run_params.tensor_names:
    kernel_biases = output_tensors['biases_layer_1']
  else:
    kernel_biases = np.zeros(output_tensors['weights_layer_1'].shape[-1])
  
  # Computes the convoluion using nested for loop.
  for_loop_convolution = utils.smt_convolution(
      input_activation_maps=np.moveaxis(image, -1, 0),
      kernels=output_tensors['weights_layer_1'],
      kernel_biases=kernel_biases,
      padding=run_params.padding,
      strides=run_params.strides)
  for_loop_convolution = np.moveaxis(
      np.array(for_loop_convolution), 0, -1)
  if np.mean(np.abs(output_tensors['first_layer'][0]
                    - for_loop_convolution)) > 1e-6:
    print('The supplied names of the tensors is wrong.')
    assert False
  else:
    print('Tensor names in run_params is consistent.')

In [None]:
verify_first_layer_conv_weights(run_params_inception_v1,
                                restore_inception_v1)

Tensor names in run_params is consistent.


In [None]:
def _get_saliency_maps(image, run_params, restore_model,
                       top_k=3000, window_size=3):
    tf.reset_default_graph()
    image = utils.process_model_input(image, run_params.pixel_range)
    restored_sess, restored_graph = restore_model()
    input_tensor = restored_graph.get_tensor_by_name(
        run_params.tensor_names['input'])
    label_index = np.argmax(restored_sess.run(
        run_params.tensor_names['softmax'],
        feed_dict={input_tensor: [image]}))
    ig_saliency_map = saliency.core.VisualizeImageGrayscale(
        masking.get_saliency_map(
            session=restored_sess,
            features=image,
            saliency_method='integrated_gradients',
            label=label_index,
            input_tensor_name=run_params.tensor_names['input'],
            output_tensor_name=run_params.tensor_names['softmax'],
            graph=restored_graph))
    restored_sess, restored_graph = restore_model()
    no_minimization_mask = utils.scale_saliency_map(
        masking.get_no_minimization_mask(
            image=image,
            label_index=label_index,
            run_params=run_params,
            top_k=top_k,
            session=restored_sess,
            graph=restored_graph),
        method='smug')
    restored_sess, restored_graph = restore_model()
    result = masking.find_mask_first_layer(
        image=image,
        label_index=label_index,
        run_params=run_params,
        window_size=window_size,
        score_method='integrated_gradients',
        top_k=top_k,
        gamma=0.0,
        timeout=3600,
        session=restored_sess,
        graph=restored_graph)
    smug_mask = result['masks'][0].reshape(
      run_params.image_placeholder_shape)[0, :, :, 0]
    return (smug_mask * no_minimization_mask, no_minimization_mask,
            ig_saliency_map)

In [None]:
def _get_saliency_params(image, saliency_map, run_params, restore_model):
  tf.reset_default_graph()
  session, _ = restore_model()
  saliency_score = utils.calculate_saliency_score(
    run_params=run_params,
    image=image,
    saliency_map=saliency_map,
    session=session)
  if saliency_score is None:
    return None, None
  return (saliency_score['saliency_score'],
          saliency_score['crop_mask'])

In [None]:
def plot_saliency_maps(image, run_params, restore_model, window_size,
                       show_bounding_box=False):
    smug_saliency, no_minimization_saliency, ig_saliency = _get_saliency_maps(
      image=image, restore_model=restore_model, run_params=run_params,
      window_size=window_size)
    smug_saliency_score, smug_crop_mask = _get_saliency_params(
        image, smug_saliency, run_params, restore_model)
    (no_minimization_saliency_score,
     no_minimization_crop_mask) = _get_saliency_params(
        image, no_minimization_saliency, run_params, restore_model)
    ig_saliency_score, ig_crop_mask = _get_saliency_params(
        image, ig_saliency, run_params, restore_model)
    if smug_saliency_score is None or no_minimization_saliency_score is None:
        return
    fig=plt.figure(figsize=(10, 10))
    fig.add_subplot(2, 2, 1)
    plt.imshow(image)
    plt.title('image')
    utils.remove_ticks()

    fig.add_subplot(2, 2, 2)
    plt.imshow(smug_saliency, cmap='RdBu_r')    
    plt.title(f'SMUG score:{smug_saliency_score:.2f}')
    if show_bounding_box:
      utils.show_bounding_box(smug_crop_mask)
    utils.remove_ticks()

    fig.add_subplot(2, 2, 3)
    plt.imshow(no_minimization_saliency, cmap='RdBu_r')    
    plt.title(f'SMUG_BASE score:{no_minimization_saliency_score:.2f}')
    if show_bounding_box:
      utils.show_bounding_box(no_minimization_crop_mask)
    utils.remove_ticks()

    fig.add_subplot(2, 2, 4)
    plt.imshow(ig_saliency, cmap='RdBu_r')
    plt.title(f'IG {ig_saliency_score:.2f}')
    if show_bounding_box:
      utils.show_bounding_box(ig_crop_mask)
    utils.remove_ticks()

In [None]:
image = np.array(Image.open(open('tabby.jpg', 'rb')))
tabby = (255 * np.ones((299, 299, 3))).astype(int)
tabby[:224, :224, :3] = image
print(tabby.shape)
plt.imshow(tabby)

In [None]:
plot_saliency_maps(tabby[:224, :224, :],
                   run_params_inception_v1,
                   restore_inception_v1,
                   window_size=4)

### Inception v3

In [None]:
# Note that most of the IG attributions lie at the edge of the cat.
# While SMUG and SMUG_BASE highlight the facial features of the cat.
# This observation has been explained in sec. 5.2
# of https://arxiv.org/pdf/2006.16322.pdf

run_params_inception_v3 = masking.RunParams(**{
  'model_path': '',
  'image_placeholder_shape': (1, 299, 299, 3),
  'model_type': 'cnn',
  'padding': (0, 0),
  'strides': 2,
  'activations': None,
  'pixel_range': (-1, 1),
  # Find the appropriate tensornames by printing the tf ops in
  # restore_inception_v3.
  'tensor_names': {
    # Ideally the input tensor to inception v3 is 'module/hub_input/images:0'
    # Instead we choose the tensor 'module/hub_input/Sub:0' because
    # the input to the model has pixel values between (0, 1) and it
    # is scaled between (-1, 1) and fed to the subsequent network.
    # The scaled version of the image is denoted by the tensor
    # 'module/hub_input/Sub:0'. Because we utils.find_mask_first_layer
    # assumes that the convolution is performed directly on the input image
    # withoout any rescaling, we feed input to the network via 
    # 'module/hub_input/Sub:0' tensor.
    'input': 'module/hub_input/Sub:0',
    'first_layer': 'module/InceptionV3/InceptionV3/Conv2d_1a_3x3/Conv2D:0',
    'first_layer_relu': 'module/InceptionV3/InceptionV3/Conv2d_1a_3x3/Relu:0',
    'logits': 'module/InceptionV3/Logits/SpatialSqueeze:0',
    'softmax': 'module/InceptionV3/Predictions/Softmax:0',
    'weights_layer_1': 'module/InceptionV3/InceptionV3/Conv2d_1a_3x3/Conv2D/ReadVariableOp:0',
  }
})

def restore_inception_v3(model_path=('https://tfhub.dev/google/imagenet/'
                                     'inception_v3/classification/1'),
                         print_ops=False):
  """Restores a tensorflow model from a checkpoint and returns it.

  Args:
    model_path: string, path to a tensorflow frozen graph.
    print_ops: bool, prints operations in a tensorflow graph if true.

  Returns:
    session: tf.Session, tensorflow session with the loaded neural network.
    graph: tensorflow graph corresponding to the tensorflow session.
  """
  graph = tf.Graph()
  session = tf.Session(graph=graph)
  with graph.as_default():
    hub.Module(model_path)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())

  # Find the appropriate tensornames by printing the tf ops.
  # These tensornames are required to construct run_params.
  if print_ops:
    for op in graph.get_operations():
      print("name:", op.name)
      print('inputs:')
      for ip in op.inputs:
        print(ip)
      print('outputs:', op.outputs)
      print('----\n')
  return session, graph

restore_inception_v3(print_ops=True)

In [None]:
verify_first_layer_conv_weights(run_params_inception_v3,
                                restore_inception_v3)

In [None]:
plot_saliency_maps(tabby,
                   run_params_inception_v3,
                   restore_inception_v3,
                   window_size=3)