In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
from torch import device
from tqdm.notebook import tqdm
import sys
sys.path.append("/content/drive/MyDrive/Cours/ensta cours/CSC_5IA23_TA_Project-main/")
from ResNet import ResNet18
torch.manual_seed(47)
np.random.seed(47)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
tr = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
data = torchvision.datasets.CIFAR100(
    root="./data", train=False, download=True, transform=tr
)
batch_size = 32
datal = torch.utils.data.DataLoader(data, batch_size=batch_size*2, shuffle=False, num_workers=2, )

100%|██████████| 169M/169M [00:01<00:00, 104MB/s]


# Neural Collapse

### separating the data per class

In [5]:
def load_model(path):
    d = torch.load(path)
    d["cl.weight"] = d["model.13.weight"]
    d["cl.bias"] = d["model.13.bias"]
    d.pop("model.13.weight")
    d.pop("model.13.bias")
    resnet = ResNet18(64,2,100).to(device)
    resnet.load_state_dict(d)
    return resnet
resnet = load_model("/content/drive/MyDrive/Cours/ensta cours/model/resnet_360_epoch.pth")

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [7]:
x= {
    "as" : [6,5,6,1],
    "is" : [6,5,68,4,6351,]
}
list(x.values())

[[6, 5, 6, 1], [6, 5, 68, 4, 6351]]

In [None]:
def compute_features(path,dataloader):
  resnet = load_model(path)
  resnet.eval()
  features = torch.tensor([],requires_grad=False)
  with torch.no_grad():
    #start with an empty tensor
    for inputs, labels in tqdm(dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        out = resnet.model(inputs)
        features = torch.cat((features,out))
  del resnet
  return features


def get_mu_G(features):
  return torch.mean(features,dim=0)

def get_sigma_T(features,mu_G):
  return (features @ features.T) / (features.shape[0])

def get_sigma_B(mu_C_list,mu_G)
  sigma_B = torch.zeros((mu_G.shape[0],mu_G.shape[0]))
  for mu_C in mu_C_list:
    sigma_B += (mu_C - mu_G) @ (mu_C - mu_G).T
  sigma_B /= len(mu_C_list)
  return sigma_B

def get_mu_C_list(features,labels):
  mu_C_list = []
  for i in range(100):
    mu_C_list.append(torch.mean(features[labels == i],dim=0))
  return mu_C_list



## NC 1

variability collapse : In class variations converges towards 0.



In [None]:
def nc1(sigma_T,sigma_B):
  return torch.linalg.norm(sigma_T - sigma_B)


def train_class_means_equinorm(mu_C_list,mu_G):
  centered_norms = []
  for key in range(len(mu_C_list)):
    centered_norms.append(torch.linalg.vector_norm(mu_C_list[key] - mu_G))
  #Fig 2
  return torch.std(centered_norms) / torch.mean(centered_norms)

def train_class_weights_equinorm(weights):
  norm = torch.linalg.norm(weights)
  #Fig 2
  return torch.std(norm) / torch.mean(norm)

def scale_invariante_nc1(Sigma_W, Sigma_B):
    """
    Computes the NC1 metric: 1/C * Trace(Sigma_W * pinv(Sigma_B))
    """
    C = Sigma_B.shape[0] # Number of classes
    Sigma_B_pinv = torch.linalg.pinv(Sigma_B, rcond=1e-6)
    # NC1 = 1/C * Trace(Sigma_W @ Sigma_B_pinv)
    nc1_value = torch.trace(Sigma_W @ Sigma_B_pinv) / C

    #Fig 6
    return nc1_value.item()


## NC 2

As training progresses, the standard deviations of the cosines approach zero
indicating equiangularity

\begin{aligned}
    \left| \|\mu_c - \mu_G\|_2 - \|\mu_{c'} - \mu_G\|_2 \right| &\to 0 \quad \forall c, c' \newline
    \langle \tilde{\mu}_c, \tilde{\mu}_{c'} \rangle &\to \frac{C}{C-1} \delta_{c,c'} - \frac{1}{C-1} \quad \forall c, c'
\end{aligned}

In [None]:
def nc2(mu_C_list,mu_G):
  class_cos_sim = []
  for key in range(len(mu_C_list)):
    for key2 in range(len(mu_C_list)):
      if key < key2: #maybe we dont have to this
        continue
      norm=  (mu_C_list[key] -mu_G )@ (mu_C_list[key2] - mu_G)
      norm = norm/(torch.linalg.vector_norm(mu_C_list[key] - mu_G)*torch.linalg.vector_norm(mu_C_list[key2] - mu_G)   )
      class_cos_sim.append(norm)

  class_cos_sim = torch.tensor(class_cos_sim)
  vals_2 = torch.std(class_cos_sim)
  vals_3 = class_cos_sim + 1/(class_cos_sim.shape[0] - 1)
  vals_3 = torch.mean(vals_3)
  #Fig 3, Fig 4
  return vals_2,vals_3

## NC 3

 Convergence to self-duality:

$$ \left\| \frac{W^T}{\|W|_F} - \frac{\dot M}{\|\dot M|_F}\right\|_F \to 0$$


In [None]:
def nc3(mu_C_list,mu_G,W):
  mu_C_list = torch.stack(mu_C_list) - mu_G
  mu_C_list = mu_C_list / torch.linalg.norm(mu_C_list,dim=1,keepdim=True)

  W = W / torch.linalg.norm(W,dim=0,keepdim=True)

  #Fig 5
  return torch.linalg.norm(mu_C_list.T - W.T)**2

## NC 4
Simplification to NCC
$$ arg\max_{c'} \left< w_{c'}, h \right> + b_{c'} \to \arg\min_{c'} \|h - \mu_{c'}\|_2 $$


In [None]:
def compute_nc4_disagreement(features, logits, class_means):
    """
    Computes the disagreement between the model's logits and the NCC rule.

    Args:
        features (N, d): Penultimate layer features.
        logits (N, C): Model output (before softmax).
        class_means (C, d): The computed mu_c for each class.

    Returns:
        disagreement_rate (float): Percentage of samples where Model != NCC.
    """
    mpred = torch.argmax(logits, dim=1)

    # 2. Compute NCC predictions using vectorized L2 distance:
    # ||h - mu||^2 = ||h||^2 + ||mu||^2 - 2<h, mu>
    h_squared = torch.sum(features**2, dim=1, keepdim=True)      # (N, 1)
    mu_squared = torch.sum(class_means**2, dim=1, keepdim=True).T # (1, C)
    distances = h_squared + mu_squared - 2 * (features @ class_means.T)

    ncc_preds = torch.argmin(distances, dim=1)
    disagreement = (mpred != ncc_preds).float().mean()
    # Fig 7
    return disagreement.item()


## NC 5

As training progresses, the clusters of OOD become increasingly orthgonal to the ETF subspace of the ID data.

In [None]:
#load the model
tab = []
for epoch in range(360,361,10):# CHange this
  path = f"/content/drive/MyDrive/resnet_360_epoch.pth"
  # resnet = ResNet18(64,2,100).to(device)
  # resnet.load_state_dict(torch.load(path))
  resnet.eval()
  features ={i:[] for i in range(100)}
  with torch.no_grad():
    for inputs, labels in tqdm(datal):
        inputs = inputs.to(device)
        labels = labels.to(device)
        out = resnet.model(inputs)
        for i in range(len(labels)):
            features[labels[i].item()].append(out[i])
  for key in features.keys():
    features[key] = torch.stack(features[key])
  vars = {i:[] for i in range(100)}
  tmp = []


In [None]:
mu_G = torch.mean(torch.cat(list(features.values())),dim=0)
print(global_mean_vector.shape)
# global_var_vector = torch.var(torch.cat(list(features.values())),dim=0)
# print(global_var_vector.shape)

#W the last layer weights
weights = resnet.cl.weight.data
print(weights.shape)

In [None]:
vars = {i:[] for i in range(100)}
class_means = {i:[] for i in range(100)} # globally centered class mean
# tmp_T = []
# tmp_B = []
# tmp_W = []
centered_norms = []
weights
for key in features.keys():
  norm = features[key]  # - global_mean_vector
  class_means[key] = torch.mean(norm,dim=0)
  # vars[key] = torch.var(norm,dim=0)
  centered_norms.append(torch.linalg.vector_norm(class_means[key] - global_mean_vector))

centered_norms = torch.tensor(centered_norms)
#Fig 2.
vals_1 = torch.std(centered_norms) / torch.mean(centered_norms)

class_cos_sim = []
for key in class_means.keys():
  for key2 in class_means.keys():
    if key < key2:
      continue
    norm=  (class_means[key] -global_mean_vector )@ (class_means[key2] - global_mean_vector)
    norm = norm/(torch.linalg.vector_norm(class_means[key] - global_mean_vector)*torch.linalg.vector_norm(class_means[key2] - global_mean_vector)   )

    class_cos_sim.append(norm)
#Fig 3
class_cos_sim = torch.tensor(class_cos_sim)
vals_2 = torch.std(class_cos_sim)
#Fig 4
vals_3 = class_cos_sim + 1/(class_cos_sim.shape[0] - 1)
vals_3 = torch.mean(vals_3)


In [None]:
print(vals_1)
print(vals_2)
print(vals_3)

In [None]:
#Fig 5



#Fig 6.




#Fig 7.


# sigma_T =
# sigma_B
# sigma_W

In [None]:
print(vals_4)