In [None]:
"""
While the activation were computed they were saved in chunks corresponding to batches.
This script combines these batch-wise saved activations into single tensors per block for easier analysis.
"""

'\ntest memorization in VAR\n'

In [None]:
import os
import sys
PROJECT_ROOT = os.path.abspath("..")
sys.path.insert(0, PROJECT_ROOT)

print("Project root:", PROJECT_ROOT)

import torch
from models import VQVAE, build_vae_var
from data_prep.subset_imagenet import get_balanced_imagenet_dataset
from pathlib import Path
import shutil

Project root: /BS/scene_repre/work/VAR


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# "/scratch/inf0/user/hpetekka/var_mem/output_activations/0/attn_proj"
# has block_0, block_1, ..., block_15 folders with attention projection activations saved as batch0.pt, batch1.pt, batch159.pt 
# combine these into a single tensor per block.

# loop over runs, which are for different augmented versions of the same data points.
for i in range(1,10):
    # for each version, loop over activation types
    for dir in (
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/fc1",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/fc1_act",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/fc2",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/q",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/k",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/v",
        f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/attn_proj"
    ):
        # for each activation type, loop over blocks, where each block corresponds to a scale.
        for block_i in range(16):
            block_folder = os.path.join(dir, f"block_{block_i}")
            all_batches = []
            for batch_i in range(160):
                batch_file = os.path.join(block_folder, f"batch{batch_i}.pt")
                batch_tensor = torch.load(batch_file)  # shape: (batch_size, num_tokens, hidden_dim)
                all_batches.append(batch_tensor)
            # concatenate all batches
            block_tensor = torch.cat(all_batches, dim=0)  # shape: (num_data_points, num_tokens, hidden_dim)
            # save the combined tensor
            combined_file = os.path.join(dir, f"block_{block_i}_combined.pt")
            torch.save(block_tensor, combined_file)
            print(f"Saved combined activations for {dir}, block {block_i} to {combined_file}")
            

Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1, block 0 to /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1/block_0_combined.pt
Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1, block 1 to /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1/block_1_combined.pt
Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1, block 2 to /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1/block_2_combined.pt
Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1, block 3 to /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1/block_3_combined.pt
Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1, block 4 to /scratch/inf0/user/hpetekka/var_mem/output_activations/1/fc1/block_4_combined.pt
Saved combined activations for /scratch/inf0/user/hpetekka/var_mem/output_a

In [13]:
base_dirs = [
    "fc1", "fc1_act", "fc2",
    "q", "k", "v", "attn_proj"
]

for i in range(1, 10):
    for subdir in base_dirs:
        dir_path = Path(f"/scratch/inf0/user/hpetekka/var_mem/output_activations/{i}/{subdir}")

        for p in dir_path.iterdir():
            if p.is_dir():
                shutil.rmtree(p)