ACE library.

Library for discovering and testing concept activation vectors. It contains
ConceptDiscovery class that is able to discover the concepts belonging to one
of the possible ResNet_pytorch labels of the ResNet_pytorch task of a network
and calculate each concept's TCAV score..

In [1]:
import jdc

In [2]:
import os,sys,inspect
import scipy.stats as stats
import skimage.segmentation as segmentation
import sklearn.cluster as cluster
import sklearn.metrics.pairwise as metrics
from tcav import cav
curdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.insert(0,curdir)
from ace_helpers import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


ModuleNotFoundError: No module named 'src'

Discovering and testing concepts of a class.

For a trained network, it first discovers the concepts as areas of the iamges in the class and then calculates the TCAV score of each concept. It is also able to transform images from pixel space into concept space.

Runs concept discovery for a given class in a trained model.

For a trained ResNet_pytorch model, the ConceptDiscovery class first performs unsupervised concept discovery using examples of one of the classes in the network.

Args:\
    model: A trained ResNet_pytorch model on which we run the concept discovery algorithm\
    target_class: Name of the one of the classes of the network\
    random_concept: A concept made of random images (used for statistical test) e.g. "random500_199"\
    bottlenecks: a list of bottleneck layers of the model for which the cocept discovery stage is performed\
    sess: Model's tensorflow session\
    source_dir: This directory that contains folders with images of network's classes.\
    activation_dir: directory to save computed activations\
    cav_dir: directory to save CAVs of discovered and random concepts\
    num_random_exp: Number of random counterparts used for calculating several CAVs and TCAVs for each concept (to make statistical testing possible.)\
    channel_mean: If true, for the unsupervised concept discovery the bottleneck activations are averaged over channels instead of using the whole acivation vector (reducing dimensionality)\
    max_imgs: maximum number of images in a discovered concept\
    min_imgs : minimum number of images in a discovered concept for the concept to be accepted\
    num_discovery_imgs: Number of images used for concept discovery. If None, will use max_imgs instead.\
    num_workers: if greater than zero, runs methods in parallel with num_workers parallel threads. If 0, no method is run in parallel threads.\
    average_image_value: The average value used for mean subtraction in the nework's preprocessing stage.\

In [11]:
class ConceptDiscovery(object):
    def __init__(self,
               model,
               target_class,
               random_concept,
               bottlenecks,
               sess,
               source_dir,
               activation_dir,
               cav_dir,
               num_random_exp=2,
               channel_mean=True,
               max_imgs=40,
               min_imgs=20,
               num_discovery_imgs=40,
               num_workers=20,
               average_image_value=117
    ):
        self.model = model
        self.sess = sess
        self.target_class = target_class
        self.num_random_exp = num_random_exp
        if isinstance(bottlenecks, str):
            bottlenecks = [bottlenecks]
        self.bottlenecks = bottlenecks
        self.source_dir = source_dir
        self.activation_dir = activation_dir
        self.cav_dir = cav_dir
        self.channel_mean = channel_mean
        self.random_concept = random_concept
        self.image_shape = model.get_image_shape()[:2]
        self.max_imgs = max_imgs
        self.min_imgs = min_imgs
        if num_discovery_imgs is None:
            num_discovery_imgs = max_imgs
        self.num_discovery_imgs = num_discovery_imgs
        self.num_workers = num_workers
        self.average_image_value = average_image_value

Loads all colored images of a concept.

Args:\
    concept: The name of the concept to be loaded\
    max_imgs: maximum number of images to be loaded

Returns:\
Images of the desired concept or class.

In [12]:
%%add_to ConceptDiscovery
def load_concept_imgs(self, concept, max_imgs=1000, compute_tcav = False):
    if compute_tcav:
        concept_dir = os.path.join(self.source_dir, concept+'_50')
    else:
        concept_dir = os.path.join(self.source_dir, concept)

    img_paths = [
        os.path.join(concept_dir, d)
        for d in tf.gfile.ListDirectory(concept_dir)
    ]
    return load_images_from_files(
        img_paths,
        max_imgs=max_imgs,
        return_filenames=False,
        do_shuffle=False,
        run_parallel=(self.num_workers > 0),
        shape=(self.image_shape),
        num_workers=self.num_workers)

Creates a set of image patches using superpixel methods.

This method takes in the concept discovery images and transforms it to adataset made of the patches of those images.

Args:\
    method: The superpixel method used for creating image patches. One of 'slic', 'watershed', 'quickshift', 'felzenszwalb'.\
    discovery_images: Images used for creating patches. If None, the images in the target class folder are used.\
    param_dict: Contains parameters of the superpixel method used in the form of {'param1':[a,b,...], 'param2':[z,y,x,...], ...}. For instance {'n_segments':[15,50,80], 'compactness':[10,10,10]} for slic method.

In [13]:
%%add_to ConceptDiscovery
def create_patches(self, method='slic', discovery_images=None, param_dict=None, gradcam=False, keep_percent=80):
    if param_dict is None:
        param_dict = {}
    dataset, image_numbers, patches = [], [], []
    if discovery_images is None:
        raw_imgs = self.load_concept_imgs(self.target_class, self.num_discovery_imgs, compute_tcav=True)
        self.discovery_images = raw_imgs
    else:
        self.discovery_images = discovery_images
    if self.num_workers:
        pool = multiprocessing.Pool(self.num_workers)
        if gradcam:
            outputs = pool.map(lambda img: self._return_gradcam_superpixels(img, method, param_dict, keep_percent), self.discovery_images)
        else:
            outputs = pool.map(lambda img: self._return_superpixels(img, method, param_dict), self.discovery_images)
        for fn, sp_outputs in enumerate(outputs):
            image_superpixels, image_patches = sp_outputs
            for superpixel, patch in zip(image_superpixels, image_patches):
                dataset.append(superpixel)
                patches.append(patch)
                image_numbers.append(fn)
    else:
      for fn, img in enumerate(self.discovery_images):
        if gradcam:
          image_superpixels, image_patches = self._return_gradcam_superpixels(
            img, method, param_dict, keep_percent)
        else:
          image_superpixels, image_patches = self._return_superpixels(
              img, method, param_dict)
        for superpixel, patch in zip(image_superpixels, image_patches):
          dataset.append(superpixel)
          patches.append(patch)
          image_numbers.append(fn)
    self.dataset, self.image_numbers, self.patches = np.array(dataset), np.array(image_numbers), np.array(patches)