# PCA on Task Vectors

In [1]:
from tvp.task_vectors.task_vectors import TaskVector
from tvp.utils.io_utils import load_model_from_artifact

import torch
from sklearn.decomposition import PCA

RUN = None
SEED_INDEX = 0
MODEL_NAME = "gladia/task-vectors-playground/ViT-B-16"
zeroshot_identifier = f"gladia/task-vectors-playground/ViT-B-16_pt"
DATASET = "CIFAR100"
SEEDS = list(range(10))

import wandb

run = wandb.init()

zeroshot_model = load_model_from_artifact(artifact_path=f"{zeroshot_identifier}:latest", run=run)

finetuned_id_fn = lambda seed: f"{MODEL_NAME}_{DATASET}_{seed}:latest"

finetuned_models = {
    seed: load_model_from_artifact(artifact_path=finetuned_id_fn(seed), run=run)
    for seed in SEEDS
}

task_vectors = [TaskVector(zeroshot_model, finetuned_models[seed]) for seed in SEEDS]

  from .autonotebook import tqdm as notebook_tqdm


[34m[1mwandb[0m: Currently logged in as: [33madrianrob[0m ([33msapienza-ml[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_pt:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_0:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_1:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_2:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_3:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_4:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_5:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:55.3


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_6:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:55.6


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_7:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:56.4


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_8:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:55.7


Loading ViT-B-16 pre-trained weights.


[34m[1mwandb[0m: Downloading large artifact ViT-B-16_CIFAR100_9:latest, 426.51MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:55.5


Loading ViT-B-16 pre-trained weights.


## Example

In [2]:
LAYER = "model.visual.conv1.weight"

#print("TV keys:", task_vectors[0].vector.keys())

weights = torch.stack([tv.vector[LAYER] for tv in task_vectors])

In [3]:
weights = weights.reshape(len(task_vectors), -1)
weights.shape

torch.Size([10, 589824])

In [4]:
# use PCA to estimate the density of the weights
pca = PCA(n_components=len(task_vectors))
pca.fit(weights.cpu().numpy())

## Explained variance

In [11]:
# for each layer in tv.vector, compute the PCA of the weights and store the explained variance
explained_variance = {}
for layer in task_vectors[0].vector.keys():
    weights = torch.stack([tv.vector[layer] for tv in task_vectors])
    weights = weights.reshape(len(task_vectors), -1)
    pca = PCA(n_components=min(len(task_vectors), weights.shape[1]))
    pca.fit(weights.cpu().numpy())
    explained_variance[layer] = [pca.explained_variance_.sum(), weights.shape[1]]

explained_variance

  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var
  explained_variance_ratio_ = explained_variance_ / total_var


{'model.positional_embedding': [0.0, 39424],
 'model.text_projection': [0.0, 262144],
 'model.logit_scale': [0.0, 1],
 'model.visual.class_embedding': [1.6649434e-05, 768],
 'model.visual.positional_embedding': [0.028091326, 151296],
 'model.visual.proj': [0.015204906, 393216],
 'model.visual.conv1.weight': [0.0126811145, 589824],
 'model.visual.ln_pre.weight': [5.897691e-05, 768],
 'model.visual.ln_pre.bias': [1.3800074e-05, 768],
 'model.visual.transformer.resblocks.0.ln_1.weight': [0.00026777905, 768],
 'model.visual.transformer.resblocks.0.ln_1.bias': [0.00024356975, 768],
 'model.visual.transformer.resblocks.0.attn.in_proj_weight': [0.24871704,
  1769472],
 'model.visual.transformer.resblocks.0.attn.in_proj_bias': [6.90188e-05, 2304],
 'model.visual.transformer.resblocks.0.attn.out_proj.weight': [0.07606186,
  589824],
 'model.visual.transformer.resblocks.0.attn.out_proj.bias': [1.3387755e-05,
  768],
 'model.visual.transformer.resblocks.0.ln_2.weight': [6.020749e-05, 768],
 'mode

## Weights Sampling

In [6]:
pca.explained_variance_.cumsum()

# generate weights using the principal components
from numpy.random import randn

n_samples = 1
n_components = 10

samples = pca.inverse_transform(randn(n_samples, n_components))

In [7]:
samples.shape

(1, 589824)