In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune import (
    prune_concern_identification,
)
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "YahooAnswersTopics"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.4
seed = 44
include_layers = ["attention", "intermediate", "output"]
exclude_layers = None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-19 16:50:18


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model fabriceyhc/bert-base-uncased-yahoo_answers_topics is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=True,
        method="unstructed",
    )
    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=True)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Evaluate the pruned model 0




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8310031148645234




CCA coefficients mean non-concern: 0.8333759343797323




Linear CKA concern: 0.9688555550468485




Linear CKA non-concern: 0.9527069859332188




Kernel CKA concern: 0.9465223279124458




Kernel CKA non-concern: 0.9294435622138361




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.697387933731079




Evaluate the pruned model 1




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8314154541255181




CCA coefficients mean non-concern: 0.8324423117644407




Linear CKA concern: 0.9751007765075973




Linear CKA non-concern: 0.9547911527302234




Kernel CKA concern: 0.9570617784907369




Kernel CKA non-concern: 0.9324814601015088




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.674736499786377




Evaluate the pruned model 2




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8237697257602182




CCA coefficients mean non-concern: 0.8334287893325756




Linear CKA concern: 0.9815467610031439




Linear CKA non-concern: 0.955821760040277




Kernel CKA concern: 0.9689991794873469




Kernel CKA non-concern: 0.9283334539066398




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.686201572418213




Evaluate the pruned model 3




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8325563125464461




CCA coefficients mean non-concern: 0.8320151104518005




Linear CKA concern: 0.9618402529316955




Linear CKA non-concern: 0.9570941039653799




Kernel CKA concern: 0.9415566181261045




Kernel CKA non-concern: 0.9410571539537179




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.6762406826019287




Evaluate the pruned model 4




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8354743368073442




CCA coefficients mean non-concern: 0.8350497901777483




Linear CKA concern: 0.9818735422297817




Linear CKA non-concern: 0.9515107146063256




Kernel CKA concern: 0.9689601371954493




Kernel CKA non-concern: 0.9265390319631919




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.6811110973358154




Evaluate the pruned model 5




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8269633176264921




CCA coefficients mean non-concern: 0.830027519311459




Linear CKA concern: 0.9832031115903946




Linear CKA non-concern: 0.9516819576638846




Kernel CKA concern: 0.9706704385079717




Kernel CKA non-concern: 0.9257992964451334




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.697254180908203




Evaluate the pruned model 6




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8330444428137765




CCA coefficients mean non-concern: 0.8366505340471554




Linear CKA concern: 0.9590006195723341




Linear CKA non-concern: 0.9589071306945219




Kernel CKA concern: 0.9210219074377393




Kernel CKA non-concern: 0.940441641324625




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.6795148849487305




Evaluate the pruned model 7




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8303114601536649




CCA coefficients mean non-concern: 0.8327693348310653




Linear CKA concern: 0.9766831666049942




Linear CKA non-concern: 0.9514951737892012




Kernel CKA concern: 0.9628762085119398




Kernel CKA non-concern: 0.9303344573380404




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.703550100326538




Evaluate the pruned model 8




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8276262160350449




CCA coefficients mean non-concern: 0.828623881354216




Linear CKA concern: 0.9757514017079159




Linear CKA non-concern: 0.9446544045993059




Kernel CKA concern: 0.9627646905965468




Kernel CKA non-concern: 0.9179093736548352




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.7152228355407715




Evaluate the pruned model 9




Evaluating the model:   0%|                                                                               | 0/…

0.3965589468552108




{'bert.encoder.layer.0.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.3997395833333333, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.3997395833333333, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.e




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.8307722224571927




CCA coefficients mean non-concern: 0.8381976070478161




Linear CKA concern: 0.9793330966867118




Linear CKA non-concern: 0.9582602406418871




Kernel CKA concern: 0.9661998129902108




Kernel CKA non-concern: 0.9382057188199399




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.669808864593506




In [9]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-19_18-06-02




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.5534,0.5685,0.5608,2992
1,1,0.7236,0.6467,0.683,2992
2,2,0.7026,0.7679,0.7338,3012
3,3,0.5414,0.5077,0.524,2998
4,4,0.7827,0.818,0.8,2973
5,5,0.9031,0.815,0.8568,3054
6,6,0.5674,0.4053,0.4728,3003
7,7,0.5596,0.751,0.6413,3012
8,8,0.6159,0.7616,0.6811,2982
9,9,0.7262,0.7374,0.7318,2982
