In [12]:
import json
from collections import defaultdict
from pathlib import Path

import numpy as np

import torch
from transformers import AutoTokenizer, AutoModel


SAVE_DIR = Path("./results")
SAVE_DIR.mkdir(exist_ok=True, parents=True)


name = 'roberta-base'
tokenizer = AutoTokenizer.from_pretrained(name)
model = AutoModel.from_pretrained(name).to("cpu")

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
# gather all output LayerNorms weights and biases
values = {"weights": [], "biases": []}
weights, biases = [], []
for roberta_layer in model.encoder.layer:
    the_layer = roberta_layer.output.LayerNorm
    values["weights"].append(the_layer.weight)
    values["biases"].append(the_layer.bias)
values["weights"] = torch.stack(values["weights"])
values["biases"] = torch.stack(values["biases"])

In [35]:
# compute mean and std of gathered values
statistics = {
    key: {
        "mean": val.mean(),
        "std": val.std(),
    }
    for key, val in values.items()
}

In [36]:
# find outlier dimensions and compute distances from the mean

# # set hyperparameters for getting outlier dimensions
sigma_factor = 2
out_count_thres = 4

# # get outlier dimensions
param_outliers = {}
param_distance_from_mean = {}
for param in values:
    distance_from_mean = (values[param] - statistics[param]["mean"]).abs()
    std = statistics[param]["std"]
    out_count = (distance_from_mean > (sigma_factor * std)).int().sum(dim=0)
    outliers_mask = out_count >= out_count_thres

    outliers_idcs = torch.arange(len(outliers_mask))[outliers_mask]
    param_outliers[param] = list(outliers_idcs.numpy())
    param_distance_from_mean[param] = distance_from_mean.mean(dim=0)

In [37]:
# number of ods for weight and biases
for param in param_outliers:
    print(param, len(param_outliers[param]))

weights 45
biases 19


In [38]:
# intersection of ods for each param -> it give us real ods we are going to work with
intersection = set(param_outliers["weights"]).intersection(set(param_outliers["biases"]))
print(f"{len(intersection) = }")
print(f"{intersection = }")

len(intersection) = 17
intersection = {97, 453, 551, 361, 330, 588, 77, 749, 494, 240, 496, 82, 498, 217, 219, 61, 731}


In [39]:
outlier_dimensions = sorted(list(intersection))
print(f"{outlier_dimensions = }")

outlier_dimensions = [61, 77, 82, 97, 217, 219, 240, 330, 361, 453, 494, 496, 498, 551, 588, 731, 749]


In [40]:
with open(SAVE_DIR / "outlier_dimensions.npy", "wb") as file:
    np.save(file, np.asarray(outlier_dimensions))


for param in param_distance_from_mean:
    with open(SAVE_DIR / f"dist_from_mean_{param}.npy", "wb") as file:
        np.save(file, np.asarray(param_distance_from_mean[param].detach().numpy()))

In [41]:
with open(SAVE_DIR / "outlier_dimensions.npy", "rb") as file:
    r = np.load(file)