In [1]:
import numpy as np
import wandb
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
import shutil
import os                              # Import the 'os' module for changing directories
os.chdir('/content/drive/MyDrive/FL')  # Change the directory
import datetime as datetime
import copy
import json
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.nn as nn

import torchvision
from torchvision import transforms
from torchvision.datasets import CIFAR100
from torch.utils.data import Subset, DataLoader, random_split

from FederatedLearningProject.data.cifar100_loader import get_cifar100
from FederatedLearningProject.checkpoints.checkpointing import save_checkpoint, load_checkpoint
from FederatedLearningProject.training.centralized_training import train_and_validate, train_epoch, validate_epoch, log_to_wandb, generate_configs
from FederatedLearningProject.training.model_editing import compute_mask




import FederatedLearningProject.experiments.models as models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


Mounted at /content/drive
Mounted at /content/drive


In [2]:
# Import CIFAR100 dataset: train_set, val_set, test_set
# The transforms are applied before returning the dataset (in the module)
valid_split_perc = 0.2    # of the 50000 training data§
train_set, val_set, test_set = get_cifar100(valid_split_perc)




Number of images in Training Set:   40000
Number of images in Validation Set: 10000
Number of images in Test Set:       10000
✅ Datasets loaded successfully


In [3]:
# Create DataLoaders for training, validation, and test sets
# batch_size è in hyperparameter (64, 128, ..), anche num_workers (consigliato per colab 2 o 4)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False, num_workers=2)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False, num_workers=2)

In [4]:
o_model = models.LinearFlexibleDino()     # original model
o_model.freeze(12)
o_model.to_cuda()
o_model.debug()

Downloading: "https://github.com/facebookresearch/dino/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dino_deitsmall16_pretrain.pth
100%|██████████| 82.7M/82.7M [00:00<00:00, 112MB/s]


cuda not available

--- Debugging Model ---
Model is primarily on device: cpu
Model overall mode: Train

Parameter Details (Name | Device | Requires Grad? | Inferred Block | Module Mode):
- backbone.cls_token                                 | cpu        | False           | N/A             | Train
- backbone.pos_embed                                 | cpu        | False           | N/A             | Train
- backbone.patch_embed.proj.weight                   | cpu        | False           | N/A             | Train
- backbone.patch_embed.proj.bias                     | cpu        | False           | N/A             | Train
- backbone.blocks.0.norm1.weight                     | cpu        | False           | Block 0         | Eval
- backbone.blocks.0.norm1.bias                       | cpu        | False           | Block 0         | Eval
- backbone.blocks.0.attn.qkv.weight                  | cpu        | False           | Block 0         | Eval
- backbone.blocks.0.attn.qkv.bias            

In [5]:
# prendo il path dello state_dict del miglio modello che ho salvato su Drive per non dover ri-trainare sempre
checkpoint_dir = "/content/drive/MyDrive/FL"
os.makedirs(checkpoint_dir, exist_ok=True)
best_model_path = os.path.join(checkpoint_dir, "best_model_locale.pth")

In [7]:
# copio il modello di base
model = copy.deepcopy(o_model)

# aggiorno i pesi del modello con quelli trainati
model.load_state_dict(torch.load(best_model_path))

model.debug()

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/FL/best_model_locale.pth'

In [None]:
model.freeze(0)
model.debug()

In [None]:
# numero totale di parametri del modello:
total_params = sum(p.numel() for p in model.parameters())
print(f"Numero totale di parametri: {total_params}")

# numero totale di parametri del modello attualmente allenabili:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Parametri attualmente allenabili (trainable): {trainable_params}")

In [None]:
# Visualizzarli
for name, param in model.named_parameters():
    if 'embed' in name or 'cls_token' in name or 'backbone.norm' in name or 'head' in name:
        print(f"{name} - shape: {param.shape}")

In [None]:
# Freezzarli
print("param.requires_grad = False: ")
for name, param in model.named_parameters():
    if 'embed' in name or 'cls_token' in name or 'backbone.norm' in name or 'head' in name:
        param.requires_grad = False
        print(f"FROZEN: {name}")


In [None]:
# numero totale di parametri del modello:
total_params = sum(p.numel() for p in model.parameters())
print(f"Numero totale di parametri: {total_params}")

# numero totale di parametri del modello attualmente allenabili:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Parametri attualmente allenabili (trainable): {trainable_params}")

print(f"\n")
model.debug()

Adesso il modello è pronto per creare la maschera con la matrice di Fisher
Ricorda: la maschera va creata in modalità evaluation (no processi stocastici)

In [None]:
model.eval()
model.debug()

In [None]:
# check device
device

In [None]:
# compute mask
mascherina=compute_mask(model, train_loader, sparsity_target=0.9, R=5, num_examples=200, device='cuda')