In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "YahooAnswersTopics"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.5
seed = 44

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-23 01:35:54


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model fabriceyhc/bert-base-uncased-yahoo_answers_topics is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
config.init_seed()
all_samples = SamplingDataset(
    train_dataloader,
    config,
    200,
    num_samples,
    False,
    4,
    resample=False,
)

In [9]:
result_list = []

module = copy.deepcopy(model)

head_importance_prunning(module, config, all_samples, ratio)

print(f"Evaluate the pruned model")
result = evaluate_model(module, config, test_dataloader, verbose=True)
result_list.append(result)

Total heads to prune: 72




tensor([[0.6076, 0.4772, 0.3568, 0.3846, 0.4560, 0.3766, 0.5436, 0.4334, 0.6432,
         0.5125, 0.4895, 0.4496],
        [0.5521, 0.6671, 0.3830, 0.3551, 0.5890, 0.4487, 0.3329, 0.3896, 0.4603,
         0.4626, 0.4191, 0.5129],
        [0.7132, 0.4412, 0.3058, 0.3228, 0.2868, 0.3049, 0.4221, 0.3026, 0.3443,
         0.6358, 0.3968, 0.3584],
        [0.3185, 0.2584, 0.2544, 0.3329, 0.4143, 0.7929, 0.2071, 0.3755, 0.3678,
         0.5597, 0.2876, 0.3968],
        [0.5160, 0.3828, 0.3549, 0.6017, 0.6927, 0.4596, 0.3273, 0.3173, 0.3073,
         0.3367, 0.4392, 0.6130],
        [0.4848, 0.3186, 0.5481, 0.2955, 0.5394, 0.2806, 0.2502, 0.4210, 0.2485,
         0.7515, 0.5052, 0.2697],
        [0.6288, 0.4500, 0.3530, 0.3226, 0.4115, 0.5442, 0.6367, 0.3214, 0.4035,
         0.4223, 0.3378, 0.6786],
        [0.5168, 0.3592, 0.4791, 0.3492, 0.6916, 0.3457, 0.3678, 0.4930, 0.3084,
         0.3526, 0.4262, 0.6160],
        [0.2873, 0.2877, 0.7601, 0.3070, 0.4546, 0.4570, 0.4629, 0.2402, 0.2399,




{(3, 1), (4, 9), (3, 7), (4, 6), (3, 10), (8, 0), (5, 1), (8, 3), (10, 0), (8, 9), (9, 8), (11, 5), (10, 3), (10, 9), (9, 11), (2, 2), (2, 5), (11, 8), (1, 6), (1, 3), (2, 8), (2, 11), (6, 2), (7, 1), (4, 2), (3, 0), (3, 3), (5, 6), (4, 8), (3, 6), (5, 3), (9, 7), (10, 2), (9, 4), (9, 10), (8, 8), (2, 4), (10, 5), (11, 1), (11, 10), (10, 8), (2, 7), (11, 2), (7, 3), (7, 9), (6, 7), (7, 6), (6, 10), (8, 11), (3, 2), (4, 1), (4, 7), (9, 0), (5, 5), (5, 11), (3, 8), (9, 3), (5, 8), (8, 7), (8, 1), (9, 9), (11, 3), (10, 1), (10, 7), (8, 10), (0, 5), (2, 3), (11, 9), (7, 5), (6, 3), (7, 8), (0, 2)}




Evaluate the pruned model




Evaluating the model:   0%|                                                                                   …

In [10]:
for concern in range(num_labels):
    config.init_seed()
    get_similarity(model, module, valid_dataloader, concern, num_samples, config)

adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.795681572857824




CCA coefficients mean non-concern: 0.8012170291828862




Linear CKA concern: 0.9008306556434963




Linear CKA non-concern: 0.8759836505011528




Kernel CKA concern: 0.8406028448368863




Kernel CKA non-concern: 0.8327563895164225




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7959036287049119




CCA coefficients mean non-concern: 0.8015771281103511




Linear CKA concern: 0.9114765760483011




Linear CKA non-concern: 0.8721065137186595




Kernel CKA concern: 0.8637066707635284




Kernel CKA non-concern: 0.8262734281890449




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7846435394380095




CCA coefficients mean non-concern: 0.800870523835138




Linear CKA concern: 0.8932335426815198




Linear CKA non-concern: 0.8806310866438168




Kernel CKA concern: 0.8579901637060104




Kernel CKA non-concern: 0.8226000827895175




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7940821716808124




CCA coefficients mean non-concern: 0.801520063153457




Linear CKA concern: 0.8659020179814308




Linear CKA non-concern: 0.8728752669140929




Kernel CKA concern: 0.8075817213951483




Kernel CKA non-concern: 0.836000224190068




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7957702473672732




CCA coefficients mean non-concern: 0.7994063285917841




Linear CKA concern: 0.8717768850227967




Linear CKA non-concern: 0.8775623688785628




Kernel CKA concern: 0.8130679917078368




Kernel CKA non-concern: 0.8336711745257592




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7835230546822707




CCA coefficients mean non-concern: 0.7995381230335455




Linear CKA concern: 0.8848304618172962




Linear CKA non-concern: 0.8845617845068445




Kernel CKA concern: 0.8314599282629703




Kernel CKA non-concern: 0.8346904092549287




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7989427609835023




CCA coefficients mean non-concern: 0.8004063625761915




Linear CKA concern: 0.9032626885954304




Linear CKA non-concern: 0.8748047073434604




Kernel CKA concern: 0.8204100236313889




Kernel CKA non-concern: 0.8360437725516076




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7931939438029876




CCA coefficients mean non-concern: 0.8003303503673472




Linear CKA concern: 0.8815457724187624




Linear CKA non-concern: 0.8733371148905894




Kernel CKA concern: 0.8327272820535279




Kernel CKA non-concern: 0.8362691072787165




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7897031404759809




CCA coefficients mean non-concern: 0.7992466396511637




Linear CKA concern: 0.8976259560722721




Linear CKA non-concern: 0.8698604722160286




Kernel CKA concern: 0.8482922360987684




Kernel CKA non-concern: 0.8306701396609342




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.7785736058338873




CCA coefficients mean non-concern: 0.801238916291067




Linear CKA concern: 0.8613109002403266




Linear CKA non-concern: 0.8710873203174176




Kernel CKA concern: 0.812543442320929




Kernel CKA non-concern: 0.8329167013605703




In [11]:
get_sparsity(module)
print("original model's perplexity")
get_perplexity(model, valid_dataloader, config)
print("pruned model's perplexity")
get_perplexity(module, valid_dataloader, config)

0.16534053810249832




{'bert.encoder.layer.0.attention.self.query.weight': 0.16666666666666666, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.16666666666666666, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.16666666666666666, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.16666666666666666, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.16666666666666666, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.16666666666666666, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention




original model's perplexity




2.6398401260375977




pruned model's perplexity




2.7314178943634033




2.7314178943634033

In [12]:
df_list = [report_to_df(df) for df in result_list]
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df = df_list[0]
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-23_01-48-42




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.5901,0.5441,0.5662,2992
1,1,0.7145,0.6601,0.6862,2992
2,2,0.7141,0.7371,0.7254,3012
3,3,0.5658,0.459,0.5068,2998
4,4,0.7728,0.8022,0.7873,2973
5,5,0.8921,0.8094,0.8488,3054
6,6,0.5816,0.4262,0.4919,3003
7,7,0.5263,0.7669,0.6242,3012
8,8,0.6328,0.7448,0.6842,2982
9,9,0.7362,0.7354,0.7358,2982
