In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.pruning.prune import prune_concern_identification
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "bert-6-128-yahoo"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.5
seed = 44
include_layers = ["intermediate", "output"]
exclude_layers = [
    "attention",
]

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-21 11:30:20


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'models/bert-6-128-yahoo'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model models/bert-6-128-yahoo is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    head_importance_prunning(module, config, all_samples, ratio)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=False,
        method="structed",
    )

    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=True)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 0




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9801925310062329




CCA coefficients mean non-concern: 0.9805428346880312




Linear CKA concern: 0.8810084314575372




Linear CKA non-concern: 0.8807277130787704




Kernel CKA concern: 0.7946129473771623




Kernel CKA non-concern: 0.7736096215559377




original model's perplexity




3.187649726867676




pruned model's perplexity




3.445162773132324




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 1




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9866936385806047




CCA coefficients mean non-concern: 0.979687900670279




Linear CKA concern: 0.8771888301483075




Linear CKA non-concern: 0.881336362691054




Kernel CKA concern: 0.7427473187320011




Kernel CKA non-concern: 0.7722194580431717




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4394149780273438




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 2




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.977950658961917




CCA coefficients mean non-concern: 0.9794881626971458




Linear CKA concern: 0.871670100519814




Linear CKA non-concern: 0.8780519018635217




Kernel CKA concern: 0.7834474522937062




Kernel CKA non-concern: 0.7676557679420412




original model's perplexity




3.187649726867676




pruned model's perplexity




3.446967363357544




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 3




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9806499487849677




CCA coefficients mean non-concern: 0.9810987815934943




Linear CKA concern: 0.8715227547236944




Linear CKA non-concern: 0.8718498305480331




Kernel CKA concern: 0.7567141322838459




Kernel CKA non-concern: 0.7657437594283243




original model's perplexity




3.187649726867676




pruned model's perplexity




3.455232858657837




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 4




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9777009621191923




CCA coefficients mean non-concern: 0.9818008294066054




Linear CKA concern: 0.8593755455052048




Linear CKA non-concern: 0.8762243452015883




Kernel CKA concern: 0.77332931803555




Kernel CKA non-concern: 0.7633135795398952




original model's perplexity




3.187649726867676




pruned model's perplexity




3.44673490524292




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 5




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9817732292116148




CCA coefficients mean non-concern: 0.9819620500766936




Linear CKA concern: 0.8547261811142113




Linear CKA non-concern: 0.8808795934788254




Kernel CKA concern: 0.8106219982480272




Kernel CKA non-concern: 0.7717211112502073




original model's perplexity




3.187649726867676




pruned model's perplexity




3.459461212158203




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 6




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.98306684935381




CCA coefficients mean non-concern: 0.9790609247024675




Linear CKA concern: 0.8898683520005476




Linear CKA non-concern: 0.8763171675289761




Kernel CKA concern: 0.7787466209417608




Kernel CKA non-concern: 0.7747103946699181




original model's perplexity




3.187649726867676




pruned model's perplexity




3.443016767501831




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 7




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9807237334379093




CCA coefficients mean non-concern: 0.981914378151924




Linear CKA concern: 0.8772550984284679




Linear CKA non-concern: 0.8694615437748007




Kernel CKA concern: 0.7792230541715408




Kernel CKA non-concern: 0.7678043443619484




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4441514015197754




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 8




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9850604644311648




CCA coefficients mean non-concern: 0.9813804452857277




Linear CKA concern: 0.8888834296834226




Linear CKA non-concern: 0.8708426267949525




Kernel CKA concern: 0.7594109547792158




Kernel CKA non-concern: 0.7705295161219758




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4390525817871094




Total heads to prune: 6




tensor([[0.4751, 0.5249],
        [0.5086, 0.4914],
        [0.4925, 0.5075],
        [0.4997, 0.5003],
        [0.4614, 0.5386],
        [0.5188, 0.4812]])




{(4, 0), (0, 0), (1, 1), (2, 0), (5, 1), (3, 0)}




Evaluate the pruned model 9




Evaluating the model:   0%|                                                                               | 0/…

0.2428428502612369




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.5, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.5, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.5, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encoder.l




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9833342374100423




CCA coefficients mean non-concern: 0.9807563180301249




Linear CKA concern: 0.8168079812239684




Linear CKA non-concern: 0.8679523075102278




Kernel CKA concern: 0.6903985915663363




Kernel CKA non-concern: 0.7691568797719118




original model's perplexity




3.187649726867676




pruned model's perplexity




3.4421026706695557




In [9]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-21_11-43-40




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.4184,0.6103,0.4965,2992
1,1,0.6969,0.5241,0.5982,2992
2,2,0.6691,0.6391,0.6538,3012
3,3,0.355,0.5791,0.4402,2998
4,4,0.7917,0.6774,0.7301,2973
5,5,0.8213,0.7734,0.7966,3054
6,6,0.7082,0.3443,0.4634,3003
7,7,0.6371,0.5787,0.6065,3012
8,8,0.6778,0.5828,0.6268,2982
9,9,0.6273,0.7072,0.6649,2982
