In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.pruning.prune import prune_concern_identification
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "bert-6-128-yahoo"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.3
seed = 44
include_layers = ["intermediate", "output"]
exclude_layers = [
    "attention",
]

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-19 15:39:33


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'models/bert-6-128-yahoo'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model models/bert-6-128-yahoo is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    head_importance_prunning(module, config, all_samples, ratio)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=False,
        method="structed",
    )

    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=True)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 0




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9801587974366425




CCA coefficients mean non-concern: 0.9816629591561571




Linear CKA concern: 0.8776923764916175




Linear CKA non-concern: 0.8781783985778098




Kernel CKA concern: 0.8129042749806379




Kernel CKA non-concern: 0.7856641006925765




original model's perplexity




3.187649726867676




pruned model's perplexity




3.429781198501587




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 1




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9880830290633866




CCA coefficients mean non-concern: 0.9806475488049632




Linear CKA concern: 0.8787339429186876




Linear CKA non-concern: 0.8801179807722904




Kernel CKA concern: 0.767683604081533




Kernel CKA non-concern: 0.7866342307978386




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4383881092071533




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 2




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9792390793008988




CCA coefficients mean non-concern: 0.9807406983266624




Linear CKA concern: 0.8814525603338756




Linear CKA non-concern: 0.8717533766199025




Kernel CKA concern: 0.8206142396494243




Kernel CKA non-concern: 0.7766636791614056




original model's perplexity




3.187649726867676




pruned model's perplexity




3.430420398712158




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 3




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9814449191198704




CCA coefficients mean non-concern: 0.9821883095858




Linear CKA concern: 0.8759939166191941




Linear CKA non-concern: 0.8660647658931265




Kernel CKA concern: 0.7838217031180896




Kernel CKA non-concern: 0.7774010825900114




original model's perplexity




3.187649726867676




pruned model's perplexity




3.434014081954956




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 4




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9803654160623674




CCA coefficients mean non-concern: 0.9829031795181625




Linear CKA concern: 0.8809374916122568




Linear CKA non-concern: 0.8684767443539163




Kernel CKA concern: 0.8210687238521199




Kernel CKA non-concern: 0.771831036405614




original model's perplexity




3.187649726867676




pruned model's perplexity




3.428401231765747




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 5




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9832685284061313




CCA coefficients mean non-concern: 0.9834047457619559




Linear CKA concern: 0.8584624667538139




Linear CKA non-concern: 0.8777563926334185




Kernel CKA concern: 0.8244165343919787




Kernel CKA non-concern: 0.7867026345453545




original model's perplexity




3.187649726867676




pruned model's perplexity




3.436887741088867




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 6




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9838997048908018




CCA coefficients mean non-concern: 0.9803761091801431




Linear CKA concern: 0.8815513977171322




Linear CKA non-concern: 0.8692397330946489




Kernel CKA concern: 0.7839930822178349




Kernel CKA non-concern: 0.7862050266703852




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4258947372436523




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 7




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9823729901560087




CCA coefficients mean non-concern: 0.9832819590747289




Linear CKA concern: 0.8882208851237277




Linear CKA non-concern: 0.8661985844744186




Kernel CKA concern: 0.807779416514015




Kernel CKA non-concern: 0.7798471230184298




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4356350898742676




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 8




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9866390006732567




CCA coefficients mean non-concern: 0.9824378948080403




Linear CKA concern: 0.8989201455474862




Linear CKA non-concern: 0.8624760219785549




Kernel CKA concern: 0.7891179107207515




Kernel CKA non-concern: 0.7810253814892514




original model's perplexity




3.187649726867676




pruned model's perplexity




3.422149419784546




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 9




Evaluating the model:   0%|                                                                               | 0/…

0.20301599692700395




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9840851108347407




CCA coefficients mean non-concern: 0.9819684041656744




Linear CKA concern: 0.8166886934189519




Linear CKA non-concern: 0.8649232511685492




Kernel CKA concern: 0.7100332940547909




Kernel CKA non-concern: 0.7841849166923747




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4358980655670166




In [9]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-19_16-01-48




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.42,0.6059,0.4961,2992
1,1,0.7054,0.5107,0.5925,2992
2,2,0.6608,0.6398,0.6501,3012
3,3,0.3438,0.5894,0.4343,2998
4,4,0.7916,0.6771,0.7299,2973
5,5,0.8222,0.7705,0.7955,3054
6,6,0.707,0.352,0.47,3003
7,7,0.6411,0.5807,0.6094,3012
8,8,0.6799,0.5812,0.6266,2982
9,9,0.6372,0.6985,0.6665,2982
