# Notebook Purpose

Implement TCAV using Pytorch for CLIP

# Load Dependencies

In [6]:
#https://github.com/openai/CLIP
# authors Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings), nerdyrodent
# authors vivian
# The original BigGAN+CLIP method was by https://twitter.com/advadnoun
import threading
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

from torch.autograd import Variable

from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import math
import random
from urllib.request import urlopen
from tqdm import tqdm
import sys
import os
sys.path.append('taming-transformers')
from omegaconf import OmegaConf
from taming.models import cond_transformer, vqgan
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import transforms
from torchvision.transforms import functional as TF
from torch.cuda import get_device_properties
torch.backends.cudnn.benchmark = False
from torch_optimizer import DiffGrad, AdamP, RAdam
from CLIP import clip
import kornia.augmentation as K
import imageio
from PIL import ImageFile, Image, PngImagePlugin, ImageChops
ImageFile.LOAD_TRUNCATED_IMAGES = True
from subprocess import Popen, PIPE
import re

In [13]:
clip.available_models()

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'ViT-B/32', 'ViT-B/16']

In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load CLIP

In [15]:
model, preprocess = clip.load('ViT-B/32', device)

In [5]:
class Hook:
    """Attaches to a module and records its activations and gradients."""

    def __init__(self, module: nn.Module):
        self.data = None
        self.hook = module.register_forward_hook(self.save_grad)
        
    def save_grad(self, module, input, output):
        self.data = output
        output.requires_grad_(True)
        output.retain_grad()
        
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.hook.remove()
        
    @property
    def activation(self) -> torch.Tensor:
        return self.data
    
    @property
    def gradient(self) -> torch.Tensor:
        return self.data.grad

In [6]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print('INFO: Early stopping')
                self.early_stop = True

# Register hooks

In [22]:
# assist from https://web.stanford.edu/~nanbhas/blog/forward-hooks-pytorch/
activations = {}
gradients = {}
def getActivation(name):
    # the hook signature 
    def hook(model, input, output):
        
        output.requires_grad_(True)
        output.retain_grad()
        gradients[name] = output.grad
        activations[name] = output.detach()
    return hook

In [23]:
hooks = []
layers = np.concatenate([[model.visual.conv1], model.visual.transformer.resblocks[1::2]])
layernames = np.concatenate([['layer0'], [f'layer{i}' for i in range(1,13,2)]], dtype=str)
for l, n in zip(layers, layernames):
    hooks.append(l.register_forward_hook(getActivation(n)))

In [24]:
# possibly needed in future if using larger dataset w/ dataloader

# embedding_list = np.empty(layers.shape, dtype=object)
# for i in range(len(embedding_list)):
#     embedding_list[i] = []
# for num_layer, name  in enumerate(layernames):
#     embedding_list[num_layer].append(activations[n])

# Image Encoding

# Text Encoding

In [25]:
def get_img_tensors(img_filename, img_dir=""):
    image = preprocess(Image.open(img_dir + img_filename)).unsqueeze(0).to(device)
    return image

In [26]:
def encode_images(img_filename, img_dir=""):
    image = preprocess(Image.open(img_dir + img_filename)).unsqueeze(0).to(device)

    image_features = model.encode_image(image.cuda())
    return image_features

Load an example image

# Define Linear Classifiers

In [7]:
class LinearClassifier(torch.nn.Module):
    
    def __init__(self, num_features):
        super().__init__()
        
        self.linear1 = torch.nn.Linear(num_features, 1)
        

    def forward(self, input_x):
        x = self.linear1(input_x)

        return x



In [29]:
positive_filenames = os.listdir('tcav/concepts/striped')
negative_filenames = os.listdir('tcav/concepts/random_0')

In [30]:
positive_concept = [encode_images(filename, 'tcav/concepts/striped/') for filename in positive_filenames]
#positive_concept = torch.vstack(positive_concept)

In [31]:
negative_concept = [encode_images(filename, 'tcav/concepts/random_0/') for filename in negative_filenames]
#negative_concept = torch.vstack(negative_concept)

In [32]:
# all_concept = positive_concept + negative_concept

In [33]:

positive_concepts = torch.vstack(positive_concept)
negative_concepts = torch.vstack(negative_concept)

In [34]:
positive_img_tensors = [get_img_tensors(img,'tcav/concepts/striped/') for img in positive_filenames]
positive_img_tensors = torch.vstack(positive_img_tensors)
negative_img_tensors = [get_img_tensors(img,'tcav/concepts/random_0/') for img in negative_filenames]
negative_img_tensors = torch.vstack(negative_img_tensors)


In [35]:
all_img_tensors = torch.vstack([positive_img_tensors, negative_img_tensors])

# Collect features

In [36]:
outputs = model.encode_image(all_img_tensors)


In [37]:
text_inputs = ["zebra"] * len(positive_concept) + ["not zebra"] * len(negative_concept)


In [38]:

text_inputs = [clip.tokenize([text_input]).to(device) for text_input in text_inputs]
target = torch.vstack([model.encode_text(text_input).float() for text_input in text_inputs])

In [40]:
all_layer_gradients = {}
all_layer_activations = {}
for layer, name in zip(layers, layernames):
    layer_gradients = []
    layer_activations = []
    with Hook(layer) as hook:

        # Do a forward and backward pass.
        output = model.encode_image(all_img_tensors)
        output.backward(target)

        grad = hook.gradient.float()
        act = hook.activation.float()
        layer_gradients.append(grad)
        layer_activations.append(act)
    all_layer_gradients[name] = layer_gradients
    all_layer_activations[name] = layer_activations

In [54]:
import pickle
with open("all_layer_gradients.pkl","wb") as f:
    pickle.dump(all_layer_gradients,f)

In [55]:
with open("all_layer_activations.pkl","wb") as f:
    pickle.dump(all_layer_activations,f)

# Process features

In [41]:
training_data =  []
linear_classifier_sizes = []
for key in all_layer_gradients.keys():
    training_data.append(all_layer_gradients[key][0].view(85,-1))
    linear_classifier_sizes.append(all_layer_gradients[key][0].view(85,-1).shape[-1])
    

# Assemble training data for all layers

In [42]:
positive_labels = torch.tensor(positive_concepts.shape[0] * [1])
negative_labels = torch.tensor(negative_concepts.shape[0] * [0])
# training_data = torch.vstack([positive_concepts, negative_concepts])
class_labels = torch.cat([positive_labels, negative_labels])

In [None]:
Assemble validation data for all layers

In [59]:
len_train = int( train.shape[0] * 0.7)

In [64]:
dataset

<torch.utils.data.dataset.TensorDataset at 0x7f1b65abe670>

In [66]:
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [ len_train, train.shape[0] -len_train] )

# Create dataloaders

In [73]:
criterion = torch.nn.BCEWithLogitsLoss()
dataloaders = []
val_dataloaders = []
for train in training_data:
    dataset = TensorDataset(train, class_labels)
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [ len_train, train.shape[0] -len_train])
    loader = DataLoader(train_dataset, batch_size=2,
                    pin_memory=False, shuffle=True)
    val_loader = DataLoader(train_dataset, batch_size=2,
                    pin_memory=False, shuffle=True)
    dataloaders.append(loader)
    val_dataloaders.append(val_loader)


# Create classifiers

In [74]:
classifiers = []
for classifier_size in linear_classifier_sizes:
    
    classifiers.append(LinearClassifier(classifier_size))




In [87]:
def train_classifier(classifier, dataloader, val_dataloader):
    clf = classifier.cuda()
    early_stopping = EarlyStopping()
    optimizer = torch.optim.SGD(clf.parameters(), lr=0.001)
    for it in range(n_epochs):
        for i, data in enumerate(dataloader,0):
            inputs, labels = data
            inputs = Variable(inputs, requires_grad=True)
            optimizer.zero_grad()
            outputs = clf(inputs.cuda().float())

            loss = criterion(outputs.cuda().float(), labels.cuda().reshape(-1,1).float())
            loss.backward()
            
            
            optimizer.step()
        with torch.no_grad():
            for i, data in enumerate(val_dataloader,0):
                inputs, labels = data
                inputs = Variable(inputs)
                outputs = clf(inputs.cuda().float())

                loss = criterion(outputs.cuda().float(), labels.cuda().reshape(-1,1).float()) 
        
            early_stopping(loss)
        if it % 10 == 0:
            pass
        if early_stopping.early_stop:
            print(loss)
            break

In [88]:
dataloaders

[<torch.utils.data.dataloader.DataLoader at 0x7f1b65ade9a0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b64662850>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b64662c40>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b64662820>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b64662610>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b65a7fcd0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f1b65a854c0>]

# Training Classifiers Features

In [89]:
n_epochs = 1000
for classifier, dataloader, val_dataloader in zip(classifiers, dataloaders, val_dataloaders):
    print(classifier)
    train_classifier(classifier, dataloader,val_dataloader)
    print("trained a classifier")


LinearClassifier(
  (linear1): Linear(in_features=37632, out_features=1, bias=True)
)
INFO: Early stopping counter 1 of 5
INFO: Early stopping counter 2 of 5
INFO: Early stopping counter 3 of 5
INFO: Early stopping counter 1 of 5
INFO: Early stopping counter 2 of 5
INFO: Early stopping counter 3 of 5
INFO: Early stopping counter 1 of 5
INFO: Early stopping counter 2 of 5
INFO: Early stopping counter 3 of 5
INFO: Early stopping counter 4 of 5
INFO: Early stopping counter 5 of 5
INFO: Early stopping
tensor(0.0622, device='cuda:0')
trained a classifier
LinearClassifier(
  (linear1): Linear(in_features=38400, out_features=1, bias=True)
)
INFO: Early stopping counter 1 of 5
INFO: Early stopping counter 2 of 5
INFO: Early stopping counter 3 of 5
INFO: Early stopping counter 4 of 5
INFO: Early stopping counter 5 of 5
INFO: Early stopping
tensor(0.4219, device='cuda:0')
trained a classifier
LinearClassifier(
  (linear1): Linear(in_features=38400, out_features=1, bias=True)
)
INFO: Early stoppi

In [90]:

with open("cached_classifiers.pkl","wb") as f:
    pickle.dump(classifiers,f)

# Get orthogonal vector

In [91]:
def get_orthogonal_vector(classifier, classifier_size):
    weight, bias = [param for param in classifier.parameters()]
    cav_vector = weight.squeeze().cpu().detach().numpy()
    orthonormal_vector = np.random.randn(classifier_size)  # take a random vector
    orthonormal_vector -= orthonormal_vector.dot(cav_vector) * cav_vector / np.linalg.norm(cav_vector)**2
    orthonormal_vector /= np.linalg.norm(orthonormal_vector) 
    return orthonormal_vector, cav_vector

In [92]:
cavs = [get_orthogonal_vector(classifier, classifier_size) for classifier, classifier_size in zip(classifiers, linear_classifier_sizes)]

# Check orthogonality

In [93]:
[np.dot(orthonormal_vector,cav_vector) for orthonormal_vector, cav_vector in cavs]

[-1.1899124090566926e-10,
 5.035230956143555e-10,
 4.686241134727043e-10,
 2.495054813797526e-11,
 -3.7813513171364166e-11,
 -8.493784451821251e-12,
 5.064932414450274e-11]

In [94]:

for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer0'][0].view(85,-1)[i], torch.tensor(cavs[0][1]).cuda() ) )

0
tensor(3.1576, device='cuda:0')
1
tensor(0.9237, device='cuda:0')
2
tensor(2.2916, device='cuda:0')
3
tensor(1.3362, device='cuda:0')
4
tensor(0.2581, device='cuda:0')
5
tensor(0.9651, device='cuda:0')
6
tensor(-0.2086, device='cuda:0')
7
tensor(3.5177, device='cuda:0')
8
tensor(1.9983, device='cuda:0')
9
tensor(2.2666, device='cuda:0')
10
tensor(2.7793, device='cuda:0')
11
tensor(1.3769, device='cuda:0')
12
tensor(2.6567, device='cuda:0')
13
tensor(3.3030, device='cuda:0')
14
tensor(-0.0600, device='cuda:0')
15
tensor(1.2082, device='cuda:0')
16
tensor(2.0105, device='cuda:0')
17
tensor(2.5700, device='cuda:0')
18
tensor(3.3431, device='cuda:0')
19
tensor(-0.1400, device='cuda:0')
20
tensor(1.3527, device='cuda:0')
21
tensor(1.8662, device='cuda:0')
22
tensor(-0.1866, device='cuda:0')
23
tensor(2.8525, device='cuda:0')
24
tensor(0.3161, device='cuda:0')
25
tensor(0.6625, device='cuda:0')
26
tensor(4.6133, device='cuda:0')
27
tensor(1.4815, device='cuda:0')
28
tensor(2.0673, device='

In [95]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer1'][0].view(85,-1)[i], torch.tensor(cavs[1][1]).cuda() ) )

0
tensor(5.6702, device='cuda:0')
1
tensor(3.9695, device='cuda:0')
2
tensor(1.4329, device='cuda:0')
3
tensor(0.9419, device='cuda:0')
4
tensor(0.0106, device='cuda:0')
5
tensor(1.2248, device='cuda:0')
6
tensor(-0.0851, device='cuda:0')
7
tensor(-0.1159, device='cuda:0')
8
tensor(0.7661, device='cuda:0')
9
tensor(0.6483, device='cuda:0')
10
tensor(0.0589, device='cuda:0')
11
tensor(0.8473, device='cuda:0')
12
tensor(0.0159, device='cuda:0')
13
tensor(0.9787, device='cuda:0')
14
tensor(1.2873, device='cuda:0')
15
tensor(0.5961, device='cuda:0')
16
tensor(0.4017, device='cuda:0')
17
tensor(-0.1369, device='cuda:0')
18
tensor(0.7163, device='cuda:0')
19
tensor(0.8413, device='cuda:0')
20
tensor(-0.0288, device='cuda:0')
21
tensor(0.6438, device='cuda:0')
22
tensor(0.8506, device='cuda:0')
23
tensor(0.8276, device='cuda:0')
24
tensor(0.8787, device='cuda:0')
25
tensor(1.0171, device='cuda:0')
26
tensor(1.0181, device='cuda:0')
27
tensor(-0.0558, device='cuda:0')
28
tensor(-0.0678, device

In [96]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer3'][0].view(85,-1)[i], torch.tensor(cavs[2][1]).cuda() ) )

0
tensor(7.8871, device='cuda:0')
1
tensor(4.2670, device='cuda:0')
2
tensor(1.5204, device='cuda:0')
3
tensor(0.9167, device='cuda:0')
4
tensor(0.0608, device='cuda:0')
5
tensor(-0.0854, device='cuda:0')
6
tensor(0.5544, device='cuda:0')
7
tensor(0.8682, device='cuda:0')
8
tensor(0.0067, device='cuda:0')
9
tensor(0.0300, device='cuda:0')
10
tensor(1.0211, device='cuda:0')
11
tensor(-0.0455, device='cuda:0')
12
tensor(-0.2739, device='cuda:0')
13
tensor(-0.0963, device='cuda:0')
14
tensor(1.0677, device='cuda:0')
15
tensor(0.6390, device='cuda:0')
16
tensor(-0.1569, device='cuda:0')
17
tensor(0.4333, device='cuda:0')
18
tensor(0.6834, device='cuda:0')
19
tensor(0.7324, device='cuda:0')
20
tensor(0.7751, device='cuda:0')
21
tensor(0.6463, device='cuda:0')
22
tensor(0.5030, device='cuda:0')
23
tensor(0.0405, device='cuda:0')
24
tensor(0.7992, device='cuda:0')
25
tensor(0.7743, device='cuda:0')
26
tensor(0.9330, device='cuda:0')
27
tensor(0.0239, device='cuda:0')
28
tensor(0.6674, device=

In [97]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer5'][0].view(85,-1)[i], torch.tensor(cavs[3][1]).cuda() ) )

0
tensor(5.0756, device='cuda:0')
1
tensor(3.6719, device='cuda:0')
2
tensor(0.8010, device='cuda:0')
3
tensor(0.4374, device='cuda:0')
4
tensor(0.3537, device='cuda:0')
5
tensor(0.5322, device='cuda:0')
6
tensor(0.0891, device='cuda:0')
7
tensor(0.4082, device='cuda:0')
8
tensor(0.0384, device='cuda:0')
9
tensor(0.0883, device='cuda:0')
10
tensor(0.0774, device='cuda:0')
11
tensor(0.0869, device='cuda:0')
12
tensor(0.0322, device='cuda:0')
13
tensor(0.4059, device='cuda:0')
14
tensor(0.4620, device='cuda:0')
15
tensor(0.1910, device='cuda:0')
16
tensor(0.2752, device='cuda:0')
17
tensor(0.2014, device='cuda:0')
18
tensor(0.2736, device='cuda:0')
19
tensor(0.3711, device='cuda:0')
20
tensor(0.2790, device='cuda:0')
21
tensor(0.3665, device='cuda:0')
22
tensor(0.3064, device='cuda:0')
23
tensor(0.4076, device='cuda:0')
24
tensor(0.3593, device='cuda:0')
25
tensor(0.3061, device='cuda:0')
26
tensor(0.4341, device='cuda:0')
27
tensor(0.1547, device='cuda:0')
28
tensor(0.3427, device='cuda

In [98]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer7'][0].view(85,-1)[i], torch.tensor(cavs[4][1]).cuda() ) )

0
tensor(-1.1681, device='cuda:0')
1
tensor(-0.4685, device='cuda:0')
2
tensor(0.0228, device='cuda:0')
3
tensor(0.1466, device='cuda:0')
4
tensor(0.1614, device='cuda:0')
5
tensor(0.0619, device='cuda:0')
6
tensor(0.1490, device='cuda:0')
7
tensor(0.1648, device='cuda:0')
8
tensor(0.1493, device='cuda:0')
9
tensor(0.1720, device='cuda:0')
10
tensor(0.2074, device='cuda:0')
11
tensor(0.1479, device='cuda:0')
12
tensor(0.3873, device='cuda:0')
13
tensor(0.1705, device='cuda:0')
14
tensor(0.2090, device='cuda:0')
15
tensor(0.2053, device='cuda:0')
16
tensor(0.0004, device='cuda:0')
17
tensor(0.0241, device='cuda:0')
18
tensor(0.1873, device='cuda:0')
19
tensor(0.1511, device='cuda:0')
20
tensor(0.0658, device='cuda:0')
21
tensor(0.2161, device='cuda:0')
22
tensor(0.1166, device='cuda:0')
23
tensor(0.2141, device='cuda:0')
24
tensor(0.0278, device='cuda:0')
25
tensor(0.0408, device='cuda:0')
26
tensor(0.2271, device='cuda:0')
27
tensor(0.1222, device='cuda:0')
28
tensor(0.0928, device='cu

In [99]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer9'][0].view(85,-1)[i], torch.tensor(cavs[5][1]).cuda() ) )

0
tensor(4.5534, device='cuda:0')
1
tensor(0.9605, device='cuda:0')
2
tensor(0.0407, device='cuda:0')
3
tensor(0.0317, device='cuda:0')
4
tensor(0.0326, device='cuda:0')
5
tensor(0.0408, device='cuda:0')
6
tensor(0.0415, device='cuda:0')
7
tensor(0.0314, device='cuda:0')
8
tensor(0.0179, device='cuda:0')
9
tensor(0.0268, device='cuda:0')
10
tensor(0.0683, device='cuda:0')
11
tensor(0.0242, device='cuda:0')
12
tensor(0.0112, device='cuda:0')
13
tensor(0.0377, device='cuda:0')
14
tensor(0.0097, device='cuda:0')
15
tensor(0.0306, device='cuda:0')
16
tensor(0.0244, device='cuda:0')
17
tensor(0.0053, device='cuda:0')
18
tensor(0.0475, device='cuda:0')
19
tensor(0.0257, device='cuda:0')
20
tensor(0.0352, device='cuda:0')
21
tensor(0.0253, device='cuda:0')
22
tensor(0.0428, device='cuda:0')
23
tensor(0.0569, device='cuda:0')
24
tensor(0.0069, device='cuda:0')
25
tensor(0.0200, device='cuda:0')
26
tensor(0.0146, device='cuda:0')
27
tensor(0.0383, device='cuda:0')
28
tensor(0.0244, device='cuda

In [100]:
for i in range(85):
    print(i)
    print(torch.dot(all_layer_gradients['layer11'][0].view(85,-1)[i], torch.tensor(cavs[6][1]).cuda() ) )

0
tensor(4.5974, device='cuda:0')
1
tensor(5.0867, device='cuda:0')
2
tensor(0., device='cuda:0')
3
tensor(0., device='cuda:0')
4
tensor(0., device='cuda:0')
5
tensor(0., device='cuda:0')
6
tensor(0., device='cuda:0')
7
tensor(0., device='cuda:0')
8
tensor(0., device='cuda:0')
9
tensor(0., device='cuda:0')
10
tensor(0., device='cuda:0')
11
tensor(0., device='cuda:0')
12
tensor(0., device='cuda:0')
13
tensor(0., device='cuda:0')
14
tensor(0., device='cuda:0')
15
tensor(0., device='cuda:0')
16
tensor(0., device='cuda:0')
17
tensor(0., device='cuda:0')
18
tensor(0., device='cuda:0')
19
tensor(0., device='cuda:0')
20
tensor(0., device='cuda:0')
21
tensor(0., device='cuda:0')
22
tensor(0., device='cuda:0')
23
tensor(0., device='cuda:0')
24
tensor(0., device='cuda:0')
25
tensor(0., device='cuda:0')
26
tensor(0., device='cuda:0')
27
tensor(0., device='cuda:0')
28
tensor(0., device='cuda:0')
29
tensor(0., device='cuda:0')
30
tensor(0., device='cuda:0')
31
tensor(0., device='cuda:0')
32
tensor(

You need labels to get gradients, to get a loss to backpropagate with

# Calculate TCAV score

In [16]:
image = preprocess(Image.open('tcav/concepts/striped/striped_0086.jpg')).unsqueeze(0).to(device)
image_features = model.encode_image(image.cuda())

In [17]:
x = Variable(image.cuda(), requires_grad=True)

In [18]:
y = model.encode_image(x)

In [184]:
y.backward(torch.ones(1,1024).cuda())

In [195]:
model.visual.proj

torch.Size([768, 512])

In [26]:
import numpy as np

np.dot(image_features.squeeze().cpu().detach().numpy(), np.ones(512))

-1.0596237182617188

In [25]:
image_features.squeeze().cpu().detach().numpy()

array([-1.1542e-01, -2.0203e-02, -3.9154e-02, -1.3452e-01, -4.5815e-03,
        2.4207e-01,  3.3594e-01,  2.3926e-01,  5.3711e-01,  1.2854e-01,
        1.4709e-01,  5.0873e-02,  3.2080e-01, -6.7871e-01, -8.2886e-02,
       -1.5857e-01,  9.5947e-01, -2.1716e-01,  2.1716e-01,  1.6797e-01,
       -6.3818e-01,  9.3262e-02, -1.2964e-01, -5.0732e-01, -2.5024e-03,
        3.2349e-01,  2.5314e-02,  1.9495e-01,  1.7224e-01, -4.6436e-01,
       -2.4796e-02,  5.9814e-02,  5.2551e-02,  4.0649e-01, -5.7959e-01,
        8.2642e-02,  3.6475e-01,  3.4351e-01,  1.1102e-01,  9.0186e-01,
       -3.9398e-02, -2.1167e-01,  2.1082e-01,  1.8823e-01, -4.5776e-01,
        5.9570e-01,  3.8477e-01,  5.1025e-01, -1.8396e-01, -4.0723e-01,
       -2.1130e-01,  2.7026e-01,  2.0471e-01, -5.2295e-01, -3.7012e-01,
       -3.8391e-02,  5.6934e-01,  2.6337e-02, -6.5613e-02,  5.0720e-02,
        9.2822e-01,  2.8516e-01, -2.4707e-01, -7.3120e-02, -3.8647e-01,
        2.2559e-01,  5.2719e-03,  2.5122e-01, -1.3269e-01, -8.05

In [1]:
import numpy as np

In [9]:
linear_classifiers = np.load("classifiers_perclass_perlayer_smeared_dotted_knitted_spiralled_chequered.npy", allow_pickle=True)

In [13]:
linear_classifiers[0][-1]

LinearClassifier(
  (linear1): Linear(in_features=512, out_features=1, bias=True)
)