# Setup

In [1]:
import os
import numpy as np
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import matplotlib
from matplotlib import pyplot as plt
from nilearn import datasets
from nilearn import plotting
import nibabel as nib
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import models  # pretrained models
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from torchvision.models import AlexNet_Weights, VGG16_Weights, VGG16_BN_Weights, VGG19_BN_Weights
from torchvision.models import efficientnet_b2, efficientnet_b5, EfficientNet_B2_Weights, EfficientNet_B5_Weights
from torchvision.models import ResNet50_Weights
from torchvision.models.detection import RetinaNet_ResNet50_FPN_Weights
from torchvision import transforms
from pytorchcv.model_provider import get_model as ptcv_get_model

from sklearn.decomposition import IncrementalPCA
from sklearn.linear_model import LinearRegression, Ridge
from scipy.stats import pearsonr as corr

# Nets

## AlexNet

In [2]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', weights=AlexNet_Weights.IMAGENET1K_V1)
model.eval() # set the model to evaluation mode, since you are not training it

Using cache found in C:\Users\giorg/.cache\torch\hub\pytorch_vision_v0.10.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [3]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.5', 'features.6', 'features.7', 'features.8', 'features.9', 'features.10', 'features.11', 'features.12', 'avgpool', 'flatten', 'classifier.0', 'classifier.1', 'classifier.2', 'classifier.3', 'classifier.4', 'classifier.5', 'classifier.6']


## ZFNet

In [4]:
model = ptcv_get_model("ZFNet", pretrained=True)
model.eval()

AlexNet(
  (features): Sequential(
    (stage1): Sequential(
      (unit1): AlexConv(
        (conv): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(1, 1))
        (activ): ReLU(inplace=True)
      )
      (pool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (stage2): Sequential(
      (unit1): AlexConv(
        (conv): Conv2d(96, 256, kernel_size=(5, 5), stride=(2, 2))
        (activ): ReLU(inplace=True)
      )
      (pool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (stage3): Sequential(
      (unit1): AlexConv(
        (conv): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activ): ReLU(inplace=True)
      )
      (unit2): AlexConv(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (activ): ReLU(inplace=True)
      )
      (unit3): AlexConv(
        (conv): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 

In [5]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.stage1.unit1.conv', 'features.stage1.unit1.activ', 'features.stage1.unit1.local_response_norm', 'features.stage1.pool1', 'features.stage2.unit1.conv', 'features.stage2.unit1.activ', 'features.stage2.unit1.local_response_norm', 'features.stage2.pool2', 'features.stage3.unit1.conv', 'features.stage3.unit1.activ', 'features.stage3.unit2.conv', 'features.stage3.unit2.activ', 'features.stage3.unit3.conv', 'features.stage3.unit3.activ', 'features.stage3.pool3', 'size', 'view', 'output.fc1.fc', 'output.fc1.activ', 'output.fc1.dropout', 'output.fc2.fc', 'output.fc2.activ', 'output.fc2.dropout', 'output.fc3']


## ResNet-50

In [2]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', weights=ResNet50_Weights.IMAGENET1K_V2)
model.eval() # set the model to evaluation mode, since you are not training it

Using cache found in C:\Users\giorg/.cache\torch\hub\pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [3]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'conv1', 'bn1', 'relu', 'maxpool', 'layer1.0.conv1', 'layer1.0.bn1', 'layer1.0.relu', 'layer1.0.conv2', 'layer1.0.bn2', 'layer1.0.relu_1', 'layer1.0.conv3', 'layer1.0.bn3', 'layer1.0.downsample.0', 'layer1.0.downsample.1', 'layer1.0.add', 'layer1.0.relu_2', 'layer1.1.conv1', 'layer1.1.bn1', 'layer1.1.relu', 'layer1.1.conv2', 'layer1.1.bn2', 'layer1.1.relu_1', 'layer1.1.conv3', 'layer1.1.bn3', 'layer1.1.add', 'layer1.1.relu_2', 'layer1.2.conv1', 'layer1.2.bn1', 'layer1.2.relu', 'layer1.2.conv2', 'layer1.2.bn2', 'layer1.2.relu_1', 'layer1.2.conv3', 'layer1.2.bn3', 'layer1.2.add', 'layer1.2.relu_2', 'layer2.0.conv1', 'layer2.0.bn1', 'layer2.0.relu', 'layer2.0.conv2', 'layer2.0.bn2', 'layer2.0.relu_1', 'layer2.0.conv3', 'layer2.0.bn3', 'layer2.0.downsample.0', 'layer2.0.downsample.1', 'layer2.0.add', 'layer2.0.relu_2', 'layer2.1.conv1', 'layer2.1.bn1', 'layer2.1.relu', 'layer2.1.conv2', 'layer2.1.bn2', 'layer2.1.relu_1', 'layer2.1.conv3', 'layer2.1.bn3', 'layer2.1.add', 'layer2.1.rel

## RetinaNet (ResNet-50 Backbone)

In [None]:
model = models.detection.retinanet_resnet50_fpn(weights = RetinaNet_ResNet50_FPN_Weights.COCO_V1)
model.eval()

In questo modo sottostante isolo i primi due gruppi di retina net (backbone e feature pyramid, come nel paper di anbebe) -> **BackboneWithFPN**

In [17]:
model.backbone = model.backbone.body.layer1[:-1]

In [24]:
model

RetinaNet(
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(256, eps=0.0)


In [None]:
# list(model.children())[:-3] -> alternativa, non ho capito ocome si usa

In [30]:
train_nodes, _ = get_graph_node_names(model.backbone)
print(train_nodes)

['x', 'body.conv1', 'body.bn1', 'body.relu', 'body.maxpool', 'body.layer1.0.conv1', 'body.layer1.0.bn1', 'body.layer1.0.relu', 'body.layer1.0.conv2', 'body.layer1.0.bn2', 'body.layer1.0.relu_1', 'body.layer1.0.conv3', 'body.layer1.0.bn3', 'body.layer1.0.downsample.0', 'body.layer1.0.downsample.1', 'body.layer1.0.add', 'body.layer1.0.relu_2', 'body.layer1.1.conv1', 'body.layer1.1.bn1', 'body.layer1.1.relu', 'body.layer1.1.conv2', 'body.layer1.1.bn2', 'body.layer1.1.relu_1', 'body.layer1.1.conv3', 'body.layer1.1.bn3', 'body.layer1.1.add', 'body.layer1.1.relu_2', 'body.layer1.2.conv1', 'body.layer1.2.bn1', 'body.layer1.2.relu', 'body.layer1.2.conv2', 'body.layer1.2.bn2', 'body.layer1.2.relu_1', 'body.layer1.2.conv3', 'body.layer1.2.bn3', 'body.layer1.2.add', 'body.layer1.2.relu_2', 'body.layer2.0.conv1', 'body.layer2.0.bn1', 'body.layer2.0.relu', 'body.layer2.0.conv2', 'body.layer2.0.bn2', 'body.layer2.0.relu_1', 'body.layer2.0.conv3', 'body.layer2.0.bn3', 'body.layer2.0.downsample.0', 'b

Osservando il codice di RetinaNet e in particolare di BackbonewithFPN emerge come il blocco `fpn`:
- Se scelto con il feature_extractor, manda in output un dizionario conenenete tutte e feature maps (5 in totale) dei layer presenti nel blocco della feature pyramid
- Ricorda che: feature_extractor genera in output già un dizionario dei layer selezionati
- Quindi in qquesto caso avremo un dizionario di dizionari (va sistemata la funzione di estrazione)

## VGG-16

In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 
                        'vgg16_bn', 
                        weights=VGG16_BN_Weights.IMAGENET1K_V1)
model.eval() # set the model to evaluation mode, since you are not training it

Using cache found in C:\Users\giorg/.cache\torch\hub\pytorch_vision_v0.10.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [5]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.5', 'features.6', 'features.7', 'features.8', 'features.9', 'features.10', 'features.11', 'features.12', 'features.13', 'features.14', 'features.15', 'features.16', 'features.17', 'features.18', 'features.19', 'features.20', 'features.21', 'features.22', 'features.23', 'features.24', 'features.25', 'features.26', 'features.27', 'features.28', 'features.29', 'features.30', 'features.31', 'features.32', 'features.33', 'features.34', 'features.35', 'features.36', 'features.37', 'features.38', 'features.39', 'features.40', 'features.41', 'features.42', 'features.43', 'avgpool', 'flatten', 'classifier.0', 'classifier.1', 'classifier.2', 'classifier.3', 'classifier.4', 'classifier.5', 'classifier.6']


## VGG-19

In [3]:
model = torch.hub.load('pytorch/vision:v0.10.0', 
                        'vgg19_bn', 
                        weights=VGG19_BN_Weights.IMAGENET1K_V1)
model.eval() # set the model to evaluation mode, since you are not training it

Using cache found in C:\Users\giorg/.cache\torch\hub\pytorch_vision_v0.10.0


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [4]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1', 'features.2', 'features.3', 'features.4', 'features.5', 'features.6', 'features.7', 'features.8', 'features.9', 'features.10', 'features.11', 'features.12', 'features.13', 'features.14', 'features.15', 'features.16', 'features.17', 'features.18', 'features.19', 'features.20', 'features.21', 'features.22', 'features.23', 'features.24', 'features.25', 'features.26', 'features.27', 'features.28', 'features.29', 'features.30', 'features.31', 'features.32', 'features.33', 'features.34', 'features.35', 'features.36', 'features.37', 'features.38', 'features.39', 'features.40', 'features.41', 'features.42', 'features.43', 'features.44', 'features.45', 'features.46', 'features.47', 'features.48', 'features.49', 'features.50', 'features.51', 'features.52', 'avgpool', 'flatten', 'classifier.0', 'classifier.1', 'classifier.2', 'classifier.3', 'classifier.4', 'classifier.5', 'classifier.6']


## EfficientNet B2

In [3]:
model = efficientnet_b2(weights=EfficientNet_B2_Weights.IMAGENET1K_V1)
model.eval() # set the model to evaluation mode, since you are not training it

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [4]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1.0.block.0', 'features.1.0.block.1', 'features.1.0.block.2', 'features.1.1.block.0', 'features.1.1.block.1', 'features.1.1.block.2', 'features.1.1.stochastic_depth', 'features.1.1.add', 'features.2.0.block.0', 'features.2.0.block.1', 'features.2.0.block.2', 'features.2.0.block.3', 'features.2.1.block.0', 'features.2.1.block.1', 'features.2.1.block.2', 'features.2.1.block.3', 'features.2.1.stochastic_depth', 'features.2.1.add', 'features.2.2.block.0', 'features.2.2.block.1', 'features.2.2.block.2', 'features.2.2.block.3', 'features.2.2.stochastic_depth', 'features.2.2.add', 'features.3.0.block.0', 'features.3.0.block.1', 'features.3.0.block.2', 'features.3.0.block.3', 'features.3.1.block.0', 'features.3.1.block.1', 'features.3.1.block.2', 'features.3.1.block.3', 'features.3.1.stochastic_depth', 'features.3.1.add', 'features.3.2.block.0', 'features.3.2.block.1', 'features.3.2.block.2', 'features.3.2.block.3', 'features.3.2.stochastic_depth', 'features.3.2.a

## EfficientNet-B5

In [4]:
model = efficientnet_b5(weights=EfficientNet_B5_Weights.IMAGENET1K_V1)
model.eval() # set the model to evaluation mode, since you are not training it

Downloading: "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth" to C:\Users\giorg/.cache\torch\hub\checkpoints\efficientnet_b5_lukemelas-b6417697.pth


  0%|          | 0.00/117M [00:00<?, ?B/s]

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
            (1): BatchNorm2d(48, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormAct

In [5]:
train_nodes, _ = get_graph_node_names(model)
print(train_nodes)

['x', 'features.0', 'features.1.0.block.0', 'features.1.0.block.1', 'features.1.0.block.2', 'features.1.1.block.0', 'features.1.1.block.1', 'features.1.1.block.2', 'features.1.1.stochastic_depth', 'features.1.1.add', 'features.1.2.block.0', 'features.1.2.block.1', 'features.1.2.block.2', 'features.1.2.stochastic_depth', 'features.1.2.add', 'features.2.0.block.0', 'features.2.0.block.1', 'features.2.0.block.2', 'features.2.0.block.3', 'features.2.1.block.0', 'features.2.1.block.1', 'features.2.1.block.2', 'features.2.1.block.3', 'features.2.1.stochastic_depth', 'features.2.1.add', 'features.2.2.block.0', 'features.2.2.block.1', 'features.2.2.block.2', 'features.2.2.block.3', 'features.2.2.stochastic_depth', 'features.2.2.add', 'features.2.3.block.0', 'features.2.3.block.1', 'features.2.3.block.2', 'features.2.3.block.3', 'features.2.3.stochastic_depth', 'features.2.3.add', 'features.2.4.block.0', 'features.2.4.block.1', 'features.2.4.block.2', 'features.2.4.block.3', 'features.2.4.stoch

## DINOv2

In [2]:
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
model.eval()

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to C:\Users\giorg/.cache\torch\hub\main.zip
xFormers not available
xFormers not available
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to C:\Users\giorg/.cache\torch\hub\checkpoints\dinov2_vits14_pretrain.pth


  0%|          | 0.00/84.2M [00:00<?, ?B/s]

DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0): NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
    (1): NestedT

The pytorch implementation of DINOv2 is itself a feature extractor, so it doesn't need to select a feature layer
- See: https://github.com/facebookresearch/dinov2/issues/89

In [None]:
for param in model.parameters():
    param.requires_grad = False

# Loss Functions

## Pearson

In [5]:
x.shape

torch.Size([10, 10])

In [12]:
def CCCLoss(x, y):
    ccc = (x.var() + y.var() + (x.mean() - y.mean())**2)
    return ccc

CCCLoss(x, y).grad_fn

<AddBackward0 at 0x1ed4329e790>

### Torchmetrics PearsonCorrCoef

In [14]:
from torchmetrics import PearsonCorrCoef
x = torch.randn(10, 11, requires_grad=True)
y = torch.randn(10, 11, requires_grad=True)
pearson = PearsonCorrCoef(num_outputs=11)
pearson_calculated = torch.mean(pearson(x, y))
print(pearson_calculated.grad_fn)

<MeanBackward0 object at 0x000001ED00E323D0>


In [18]:
# !pip install torchmetrics
from torchmetrics import PearsonCorrCoef
x = torch.randn(10, 11, requires_grad=True)
y = torch.randn(10, 11, requires_grad=True)

# Define the loss function
def PearsonMeanLoss(pred, target):
    # Create the PearsonCorrCoef object for the loss function
    pearson = PearsonCorrCoef(num_outputs=pred.shape[1])
    # Calculate the Pearson correlation coefficient between the prediction and target
    # where pred and target tensors are of shape (batch_size, num_variable)
    # Squared the coefficients and take the mean
    return torch.mean(torch.pow(pearson(pred, target),2))

def PearsonMedianLoss(pred, target):
    # Create the PearsonCorrCoef object for the loss function
    pearson = PearsonCorrCoef(num_outputs=pred.shape[1])
    # Calculate the Pearson correlation coefficient between the prediction and target
    # where pred and target tensors are of shape (batch_size, num_variable)
    # Squared the coefficients and take the median
    return torch.median(torch.pow(pearson(pred, target),2))

pearson_calculated = PearsonMedianLoss(x, y)
print(pearson_calculated.grad_fn)

<MedianBackward0 object at 0x000001ED4EB86B20>


In [20]:
import torch
from torch.autograd import Function, Variable

class PearsonCorrCoef(Function):
    @staticmethod
    def forward(ctx, input1, input2):
        # Compute the Pearson correlation coefficient between input1 and input2
        # where input1 and input2 tensors are of shape (batch_size, num_variable)
        # Return the coefficients as a tensor of shape (batch_size, num_variable)
        mean1 = input1.mean(dim=0, keepdim=True)
        mean2 = input2.mean(dim=0, keepdim=True)
        std1 = input1.std(dim=0, keepdim=True)
        std2 = input2.std(dim=0, keepdim=True)
        cov = ((input1 - mean1) * (input2 - mean2)).mean(dim=0, keepdim=True)
        corr_coef = cov / (std1 * std2)
        ctx.save_for_backward(input1, input2)
        return corr_coef

    @staticmethod
    def backward(ctx, grad_output):
        input1, input2 = ctx.saved_tensors
        # Compute the gradient of the Pearson correlation coefficient with respect to the inputs
        mean1 = input1.mean(dim=0, keepdim=True)
        mean2 = input2.mean(dim=0, keepdim=True)
        std1 = input1.std(dim=0, keepdim=True)
        std2 = input2.std(dim=0, keepdim=True)
        cov = ((input1 - mean1) * (input2 - mean2)).mean(dim=0, keepdim=True)
        corr_coef = cov / (std1 * std2)
        grad_input1 = grad_input2 = None
        if ctx.needs_input_grad[0]:
            grad_input1 = grad_output * (input2 - mean2) / (std1 * std2**2) - (corr_coef * grad_output).mean(dim=0, keepdim=True) * (input2 - mean2) / (std1 * std2**2)
        if ctx.needs_input_grad[1]:
            grad_input2 = grad_output * (input1 - mean1) / (std1**2 * std2) - (corr_coef * grad_output).mean(dim=0, keepdim=True) * (input1 - mean1) / (std1**2 * std2)
        return grad_input1, grad_input2

class CustomLoss(Function):
    @staticmethod
    def forward(ctx, pred, target):
        # Create the PearsonCorrCoef object for the loss function
        pearson = PearsonCorrCoef.apply
        # Calculate the Pearson correlation coefficient between the prediction and target
        # where pred and target tensors are of shape (batch_size, num_variable)
        # Squared the coefficients and take the median
        loss = torch.median(torch.pow(pearson(pred, target),2))
        ctx.save_for_backward(pred, target)
        return loss

    @staticmethod
    def backward(ctx, grad_output):
        pred, target = ctx.saved_tensors
        # Compute the gradient of the loss with respect to the prediction
        pearson = PearsonCorrCoef.apply
        corr_coef = pearson(pred, target)
        grad_input = torch.zeros_like(pred)
        for i in range(pred.shape[1]):
            grad_input[:, i] = 2 * torch.where(corr_coef[:, i] != 0, corr_coef[:, i], torch.ones_like(corr_coef[:, i])) * grad_output
        return grad_input, None

# Example usage
loss_fn = CustomLoss.apply
pred = Variable(torch.randn(10, 5), requires_grad=True)
target = Variable(torch.randn(10, 5))
loss = loss_fn(pred, target)
loss.backward()

In [21]:
loss

tensor(0.0169, grad_fn=<CustomLossBackward>)