# THINGSvision
This is the TensorFlow version, you can find a PyTorch example [here](https://colab.research.google.com/github/ViCCo-Group/THINGSvision/blob/master/doc/pytorch.ipynb)

## Settings

### Install thingsvision and dependencies

In [None]:
!pip install --upgrade thingsvision

In [None]:
!pip install ipywidgets

In [None]:
import os
import torch
import tensorflow
import re
import numpy as np
from thingsvision import get_extractor
from thingsvision.utils.storing import save_features
from thingsvision.utils.data import ImageDataset, DataLoader

from google.colab import drive
from typing import Any, Dict, List

### Image and feature directories

Specify both `path/to/images` (input directory) and `path/to/features` (output directory) on your Google Drive. 
The image directory is expected to contain images that are saved similarly to `/dog/img_1.png` or `/cat/img_1.jpg`. 

In [None]:
image_dir = 'path/to/images'  # path/to/images in GDrive
output_dir = 'path/to/features' # path/to/output  in GDrive

Mount Google Drive 

In [None]:
mounted_dir = '/thingsvision'
drive.mount(mounted_dir, force_remount=True)

In [None]:
full_image_path = os.path.join(mounted_dir, 'MyDrive', image_dir)
full_output_path = os.path.join(mounted_dir, 'MyDrive', output_dir)

### Helper functions to extract features

In [None]:
def extract_features(
                    extractor: Any,
                    module_name: str,
                    image_path: str,
                    out_path: str,
                    batch_size: int,
                    flatten_activations: bool,
                    apply_center_crop: bool,
                    class_names: List[str]=None,
                    file_names: List[str]=None,
) -> np.ndarray:
    """Extract features for a single layer."""                                    
    dataset = ImageDataset(
        root=image_path,
        out_path=out_path,
        backend=extractor.get_backend(),
        transforms=extractor.get_transformations(apply_center_crop=apply_center_crop, resize_dim=256, crop_dim=224),
        class_names=class_names,
        file_names=file_names,
    )
    batches = DataLoader(
        dataset=dataset,
        batch_size=batch_size, 
        backend=extractor.get_backend(),
        )
    features = extractor.extract_features(
                    batches=batches,
                    module_name=module_name,
                    flatten_acts=flatten_activations,
    )
    return features


def get_module_names(modules: List[Any]) -> List[str]:
    """Yield module names associated with layers."""
    return list(map(lambda m: m.name, modules))


def extract_all_layers(
                        model_name: str,
                        extractor: Any,
                        image_path: str,
                        out_path: str,
                        batch_size: int,
                        flatten_activations: bool,
                        apply_center_crop: bool,
                        layer: str='conv',
                        class_names: List[str]=None,
                        file_names: List[str]=None,
) -> Dict[str, np.ndarray]:
    """Extract features for all selected layers and save them to disk."""
    features_per_layer = {}
    module_names = get_module_names(extractor.model.layers)
    for l, module_name in enumerate(module_names, start=1):
        if re.search(f'{layer}', module_name):
            # extract features for layer "module_name"
            features = extract_features(
                                        extractor=extractor,
                                        module_name=module_name,
                                        image_path=image_path,
                                        out_path=out_path,
                                        batch_size=batch_size,
                                        flatten_activations=flatten_activations,
                                        apply_center_crop=apply_center_crop,
                                        class_names=class_names,
                                        file_names=file_names,
            )
            # replace with e.g., [f'conv_{l:02d}'] or [f'fc_{l:02d}']
            features_per_layer[f'{layer}_{l:02d}'] = features
            # save features to disk
            save_features(features, out_path=f'{out_path}/features_{model_name}_{module_name}', file_format='npy')
    return features_per_layer

### Variables

In [None]:
pretrained = True # use pretrained model weights
model_path = None # if pretrained = False (i.e., randomly initialized weights) set path to model weights
batch_size = 32 # use a power of two (this can be any size, depending on the number of images for which you aim to extract features)
apply_center_crop = True # center crop images (set to False, if you don't want to center-crop images)
flatten_activations = True # whether or not features (e.g., of Conv layers) should be flattened
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class_names = None  # optional list of class names for class dataset
file_names = None # optional list of file names according to which features should be sorted

Select `model` and `layer` for which you want to extract image features. If you want to extract features from a `torchvision` model, use the model naming defined [here](https://pytorch.org/vision/stable/models.html) (e.g., `vgg16` if you want to use VGG-16). If you are uncertain about the naming and enumeration of the layers, use `model.show()` to see how specific layers called.

### VGG-16 (pretrained on ImageNet)

Note that it is crucial to set a model's `source`. VGG16 implementations exist in different libraries and therefore (pretrained) weights can be downloaded from different sources. One such source is `keras` from which we will download VGG16.

In [None]:
## load model
model_name = 'VGG16' 
# specify model source 
# we use keras (backend is TensorFlow) here (https://keras.io/api/applications/)
source = 'keras' 

extractor = get_extractor(
            model_name=model_name,
            pretrained=pretrained,
            model_path=model_path,
            device=device,
            source=source
)

In [None]:
## select layer

# NOTE: uncomment the line below, if you are uncertain about layer naming in TensorFlow
# module_name = model.show() 
module_name = 'fc1' # 'block1_conv1'

#### Feature extraction single layer

In [None]:
# extract features for a single layer (e.g., penultimate)
features = extract_features(
                            extractor=extractor,
                            module_name=module_name,
                            image_path=full_image_path,
                            out_path=full_output_path,
                            batch_size=batch_size,
                            flatten_activations=flatten_activations,
                            apply_center_crop=apply_center_crop,
                            class_names=class_names,
                            file_names=file_names,
)

# save features to disk
save_features(features, out_path=f'{full_output_path}/features_{model_name}_{module_name}', file_format='npy')

#### Feature extraction all convolutional or fully-connected layers

In [None]:
# extract features for all convolutional layers (i.e., block#_conv#) and save them to disk
layer = 'conv'
features_conv_layers = extract_all_layers(
                                            extractor=extractor,
                                            model_name=model_name,
                                            image_path=full_image_path,
                                            out_path=full_output_path,
                                            batch_size=batch_size,
                                            flatten_activations=flatten_activations,
                                            apply_center_crop=apply_center_crop,
                                            layer=layer,
                                            class_names=class_names,
                                            file_names=file_names,
)

In [None]:
# extract features for all fully-connected layers (i.e., fc#) and save them to disk
layer = 'fc'
features_fc_layers = extract_all_layers(
                                        extractor=extractor,
                                        model_name=model_name,
                                        image_path=full_image_path,
                                        out_path=full_output_path,
                                        batch_size=batch_size,
                                        flatten_activations=flatten_activations,
                                        apply_center_crop=apply_center_crop,
                                        layer=layer,
                                        class_names=class_names,
                                        file_names=file_names,
)

### Representational Similarity Analysis (RSA)

In [None]:
from thingsvision.core.rsa import compute_rdm, plot_rdm

In [None]:
# compute representational dissimilarity matrix
rdm = compute_rdm(features, method='correlation')

In [None]:
# plot rdm
plot_rdm(
            full_output_path,
            features,
            method='correlation',
            format='.png', # '.jpg'
            colormap='cividis',
            show_plot=True,
)

### Centered Kernel Alignment (CKA)

In [None]:
from thingsvision.core.cka import CKA

In [None]:
features_i = features_fc_layers[f'fc_01']
features_j = features_fc_layers[f'fc_02']

assert features_i.shape[0] == features_j.shape[0]
m = features_i.shape[0]
cka = CKA(m=m, kernel='linear')
rho = cka.compare(X=features_i, Y=features_j)