In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.pruning.prune import prune_concern_identification
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "bert-small-yahoo"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.4
seed = 44
include_layers = ["intermediate", "output"]
exclude_layers = [
    "attention",
]

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-22 02:50:36


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'models/bert-small-yahoo'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model models/bert-small-yahoo is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    head_importance_prunning(module, config, all_samples, ratio)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=True,
        method="structed",
    )

    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=True)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 0




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9736809787757545




CCA coefficients mean non-concern: 0.9692683411201528




Linear CKA concern: 0.9689319703831564




Linear CKA non-concern: 0.9662063039516342




Kernel CKA concern: 0.9219770707020479




Kernel CKA non-concern: 0.9116728572225152




original model's perplexity




3.168053388595581




pruned model's perplexity




3.1982669830322266




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 1




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9725022720258272




CCA coefficients mean non-concern: 0.9685574944242177




Linear CKA concern: 0.9626209807727718




Linear CKA non-concern: 0.9656428718873923




Kernel CKA concern: 0.9051425193754267




Kernel CKA non-concern: 0.9095677624379398




original model's perplexity




3.168053388595581




pruned model's perplexity




3.2191548347473145




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 2




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9713846157491134




CCA coefficients mean non-concern: 0.9696807192074407




Linear CKA concern: 0.9579139697996142




Linear CKA non-concern: 0.9627220073632284




Kernel CKA concern: 0.893773097190949




Kernel CKA non-concern: 0.9046589275724924




original model's perplexity




3.168053388595581




pruned model's perplexity




3.199793577194214




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 3




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9748130344947606




CCA coefficients mean non-concern: 0.9686207701476479




Linear CKA concern: 0.9618096182642545




Linear CKA non-concern: 0.9649048605031862




Kernel CKA concern: 0.9032573154210743




Kernel CKA non-concern: 0.908623589029919




original model's perplexity




3.168053388595581




pruned model's perplexity




3.204104423522949




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 4




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9719770624909498




CCA coefficients mean non-concern: 0.9708686426063002




Linear CKA concern: 0.9392158237161896




Linear CKA non-concern: 0.9647399338128605




Kernel CKA concern: 0.892635421166937




Kernel CKA non-concern: 0.9070445711385192




original model's perplexity




3.168053388595581




pruned model's perplexity




3.202085256576538




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 5




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9538988371317402




CCA coefficients mean non-concern: 0.9742236988811451




Linear CKA concern: 0.9032988922471122




Linear CKA non-concern: 0.9695272807295217




Kernel CKA concern: 0.8169054115352358




Kernel CKA non-concern: 0.9199801769424659




original model's perplexity




3.168053388595581




pruned model's perplexity




3.1982600688934326




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 6




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9747209846506564




CCA coefficients mean non-concern: 0.9683284322885429




Linear CKA concern: 0.9584838344463122




Linear CKA non-concern: 0.9644578054694996




Kernel CKA concern: 0.8900365613394172




Kernel CKA non-concern: 0.9104417781814153




original model's perplexity




3.168053388595581




pruned model's perplexity




3.196445941925049




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 7




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9669452017975703




CCA coefficients mean non-concern: 0.9693910536344974




Linear CKA concern: 0.954917300704434




Linear CKA non-concern: 0.9667416938780246




Kernel CKA concern: 0.8925112443030572




Kernel CKA non-concern: 0.913925895418786




original model's perplexity




3.168053388595581




pruned model's perplexity




3.206775188446045




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 8




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.966572390139504




CCA coefficients mean non-concern: 0.969323779194078




Linear CKA concern: 0.9635506651787453




Linear CKA non-concern: 0.963329433459536




Kernel CKA concern: 0.9033209150265296




Kernel CKA non-concern: 0.9078216751617447




original model's perplexity




3.168053388595581




pruned model's perplexity




3.1976025104522705




Total heads to prune: 6




tensor([[0.4593, 0.4443, 0.4726, 0.5557],
        [0.4624, 0.4484, 0.5516, 0.4916],
        [0.4027, 0.5485, 0.5973, 0.4291],
        [0.4493, 0.5507, 0.4512, 0.4729]])




{(0, 1), (1, 1), (2, 0), (3, 0), (2, 3), (3, 2)}




Evaluate the pruned model 9




Evaluating the model:   0%|                                                                               | 0/…

0.38289208361404387




{'bert.encoder.layer.0.attention.self.query.weight': 0.25, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.25, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.25, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.25, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.39990234375, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.39990234375, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9698322259908162




CCA coefficients mean non-concern: 0.9679492013953431




Linear CKA concern: 0.9524814085150038




Linear CKA non-concern: 0.9648641515819248




Kernel CKA concern: 0.8858032998232896




Kernel CKA non-concern: 0.9097922472915969




original model's perplexity




3.168053388595581




pruned model's perplexity




3.20920991897583




In [9]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-22_03-26-58




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.55,0.4833,0.5145,2992
1,1,0.6985,0.4886,0.575,2992
2,2,0.6825,0.6172,0.6482,3012
3,3,0.3585,0.5727,0.441,2998
4,4,0.7423,0.7703,0.756,2973
5,5,0.7725,0.7773,0.7749,3054
6,6,0.6753,0.3836,0.4893,3003
7,7,0.6329,0.6165,0.6246,3012
8,8,0.5523,0.7069,0.6201,2982
9,9,0.7234,0.6657,0.6933,2982
