<a href="https://colab.research.google.com/github/gylab-TAU/layer_representation_extraction_service/blob/master/representation_extraction_service.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# @title Run this box and click on the 'RESTART RUNTIME' button once it stops running
!git clone "https://github.com/gylab-TAU/layer_representation_extraction_service.git"
!pip install -r layer_representation_extraction_service/requirements.txt

Cloning into 'layer_representation_extraction_service'...
remote: Enumerating objects: 123, done.[K
remote: Counting objects: 100% (123/123), done.[K
remote: Compressing objects: 100% (81/81), done.[K
remote: Total 123 (delta 64), reused 92 (delta 41), pack-reused 0[K
Receiving objects: 100% (123/123), 28.64 KiB | 14.32 MiB/s, done.
Resolving deltas: 100% (64/64), done.
Collecting numpy==1.26.1 (from -r layer_representation_extraction_service/requirements.txt (line 1))
  Downloading numpy-1.26.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m85.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting open_clip_torch==2.23.0 (from -r layer_representation_extraction_service/requirements.txt (line 2))
  Downloading open_clip_torch-2.23.0-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# @title Run this cell and allow access to google drive. Then use the interactive form below to calculate RDMs.
from google.colab import drive
drive.mount('/content/drive')
from layer_representation_extraction_service.models import *
from layer_representation_extraction_service.rdm_calculations import MemoryEfficientRDMCalculator
from torchmetrics.functional import pairwise_cosine_similarity, pairwise_euclidean_distance
import ipywidgets as widgets
from pathlib import Path
import pandas as pd
from tqdm.notebook import tqdm
import os

# create a dropdown widget for DNN choice
dnns_dropdown = widgets.Dropdown(
    options=['VGG16 - ImageNet', 'OpenCLIP ViT-B/32'],
    value='VGG16 - ImageNet',
    description='Select a model:',
    disabled=False,
)
display(dnns_dropdown)

# create a dropdown widget for DNN choice
metric_dropdown = widgets.Dropdown(
    options=['Cosine similarity', 'Euclidian distance'],
    value='Cosine similarity',
    description='Choose the metric to compare representations with:',
    disabled=False,
)
display(metric_dropdown)

# Input images directory
images_directory_input_widget = widgets.Text(
    value='',
    placeholder='/',
    description='Choose images directory:',
    disabled=False
)
display(images_directory_input_widget)



# Input for batch size
batch_size_input = widgets.BoundedIntText(
    value=1,
    min=1,
    max=300,
    step=1,
    description='Batch size:',
    disabled=False
)
display(batch_size_input)


# create a start button
start_button = widgets.Button(description='Create RDMs')

# define a function to be called when the start button is clicked
def start_button_clicked(b):
  if dnns_dropdown.value == 'VGG16 - ImageNet':
    model, preprocess, layer_names = get_vgg16_imagenet_resources()
  elif dnns_dropdown.value == 'OpenCLIP ViT-B/32':
    model, preprocess, layer_names = get_clip_vit32_resources()

  if metric_dropdown.value == 'Cosine similarity':
    metric = pairwise_cosine_similarity
  elif metric_dropdown.value == 'Euclidian distance':
    metric = pairwise_euclidean_distance

  rdm_calc = MemoryEfficientRDMCalculator(metric, batch_size_input.value)

  imgs_dir = Path(images_directory_input_widget.value)
  if not imgs_dir.is_dir():
    print("Input path does not lead to a directory. Please must choose a directory \
            containing images and try again.")
  else:
    images_full_paths = list(imgs_dir.iterdir())
    images_names = [f.stem for f in images_full_paths]
    outputs = rdm_calc.calc_rdm(model, preprocess, list(imgs_dir.iterdir()), layer_names)


    os.makedirs(f'/content/{dnns_dropdown.value.replace("/","")}', exist_ok=True)
    for layer in tqdm(outputs, desc='saving rdms...'):
      rdm = pd.DataFrame(outputs[layer], index=images_names, columns=images_names)
      if Path(f'/content/{dnns_dropdown.value.replace("/","")}/{layer}.csv').exists():
        print('An output file already exists, please remove all preexisting RDMs to make room for new ones')
      else:
        rdm.to_csv(f'/content/{dnns_dropdown.value.replace("/","")}/{layer}.csv')


# attach the function to the start button
start_button.on_click(start_button_clicked)

display(start_button)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Dropdown(description='Select a model:', options=('VGG16 - ImageNet', 'OpenCLIP ViT-B/32'), value='VGG16 - Imag…

Dropdown(description='Choose the metric to compare representations with:', options=('Cosine similarity', 'Eucl…

Text(value='', description='Choose images directory:', placeholder='/')

BoundedIntText(value=1, description='Batch size:', max=300, min=1)

Button(description='Create RDMs', style=ButtonStyle())

n_batches = 118 / 30 = 4
n_iters = 4 + 4 * (4 - 1) / 2 = 10.0


Calculating block [3, 3]: 100%|██████████| 10/10 [12:27<00:00, 74.76s/it]


saving rdms...:   0%|          | 0/14 [00:00<?, ?it/s]