In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "OSDG"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.6
seed = 44

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-23 21:44:43


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'OSDG'

,
 

'model_name'

: 

'sadickam/sdg-classification-bert'

,
 

'num_labels'

: 

16

,
 

'tokenizer_name'

: 

'sadickam/sdg-classification-bert'

}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset OSDG.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset OSDG is loaded




{

'config_name'

: 

'2024-01-01'

,
 

'features'

: 

{'first_column': 'text', 'second_column': 'labels'}

,
 

'path'

: 

'albertmartinez/OSDG'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
config.init_seed()
all_samples = SamplingDataset(
    train_dataloader,
    config,
    200,
    num_samples,
    False,
    4,
    resample=False,
)

In [9]:
result_list = []

module = copy.deepcopy(model)

head_importance_prunning(module, config, all_samples, ratio)

print(f"Evaluate the pruned model")
result = evaluate_model(module, config, test_dataloader, verbose=True)
result_list.append(result)

Total heads to prune: 86




tensor([[0.4747, 0.4317, 0.4891, 0.5093, 0.4567, 0.4666, 0.5067, 0.4785, 0.5582,
         0.4236, 0.5825, 0.4175],
        [0.4234, 0.6757, 0.3818, 0.3243, 0.6439, 0.4396, 0.6521, 0.3851, 0.3816,
         0.3963, 0.4775, 0.5047],
        [0.6747, 0.4075, 0.2934, 0.2980, 0.3167, 0.3683, 0.3579, 0.3163, 0.2893,
         0.7107, 0.3948, 0.3906],
        [0.4759, 0.4354, 0.3277, 0.4589, 0.3294, 0.6723, 0.3433, 0.3908, 0.4428,
         0.6593, 0.3811, 0.6148],
        [0.7708, 0.3074, 0.3783, 0.5660, 0.4790, 0.4956, 0.3807, 0.3298, 0.2292,
         0.4202, 0.5358, 0.5003],
        [0.4186, 0.3720, 0.3554, 0.5116, 0.4853, 0.2781, 0.3575, 0.5138, 0.3230,
         0.7219, 0.6544, 0.2794],
        [0.4631, 0.4400, 0.4843, 0.4607, 0.4504, 0.6346, 0.6040, 0.4176, 0.4031,
         0.6338, 0.3654, 0.5141],
        [0.6227, 0.5131, 0.4976, 0.3439, 0.6072, 0.5023, 0.3705, 0.5096, 0.4806,
         0.6561, 0.5942, 0.4583],
        [0.5007, 0.4401, 0.5599, 0.5067, 0.4545, 0.5377, 0.5388, 0.3886, 0.3313,




{(4, 9), (5, 1), (8, 9), (11, 5), (2, 2), (0, 5), (2, 11), (4, 2), (3, 6), (9, 10), (0, 7), (2, 4), (1, 8), (6, 4), (7, 3), (3, 8), (5, 5), (8, 4), (9, 3), (0, 0), (11, 9), (0, 9), (1, 10), (3, 1), (3, 10), (11, 2), (1, 3), (3, 3), (5, 0), (11, 4), (10, 8), (1, 5), (6, 1), (5, 2), (4, 4), (5, 11), (1, 7), (2, 6), (7, 11), (3, 7), (4, 6), (10, 3), (1, 0), (1, 9), (0, 11), (2, 8), (6, 8), (3, 0), (5, 6), (4, 8), (8, 8), (10, 5), (1, 2), (0, 4), (2, 1), (2, 10), (6, 10), (3, 2), (4, 1), (8, 1), (10, 7), (11, 6), (2, 3), (6, 3), (3, 4), (10, 0), (9, 11), (11, 8), (2, 5), (10, 2), (9, 4), (11, 1), (0, 1), (10, 11), (11, 10), (2, 7), (6, 7), (7, 6), (4, 7), (5, 8), (8, 7), (10, 4), (9, 6), (11, 3), (6, 0), (7, 8)}




Evaluate the pruned model




Evaluating the model:   0%|                                                                                   …

In [10]:
for concern in range(num_labels):
    config.init_seed()
    get_similarity(model, module, valid_dataloader, concern, num_samples, config)

adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6313967818595098




CCA coefficients mean non-concern: 0.6338743279130721




Linear CKA concern: 0.8569264450291059




Linear CKA non-concern: 0.7660257921309509




Kernel CKA concern: 0.8480779616168491




Kernel CKA non-concern: 0.7603234759575188




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6311438709272263




CCA coefficients mean non-concern: 0.6368667587298126




Linear CKA concern: 0.7932539033450703




Linear CKA non-concern: 0.7606934330373517




Kernel CKA concern: 0.7943226148487448




Kernel CKA non-concern: 0.7566350708252113




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6311854227193168




CCA coefficients mean non-concern: 0.6338763574497075




Linear CKA concern: 0.8434250091838275




Linear CKA non-concern: 0.7623295607837308




Kernel CKA concern: 0.821650327428592




Kernel CKA non-concern: 0.7485051360938402




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6254578751024302




CCA coefficients mean non-concern: 0.6375498245693213




Linear CKA concern: 0.6206271271721148




Linear CKA non-concern: 0.7798169894768925




Kernel CKA concern: 0.6111563612135221




Kernel CKA non-concern: 0.7676564506987431




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6384919983149312




CCA coefficients mean non-concern: 0.637355127916691




Linear CKA concern: 0.8427512990172338




Linear CKA non-concern: 0.7569123002848033




Kernel CKA concern: 0.8091454791165761




Kernel CKA non-concern: 0.7526288396948022




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6291272670524112




CCA coefficients mean non-concern: 0.6367875120989579




Linear CKA concern: 0.8259324576208642




Linear CKA non-concern: 0.7618085400639106




Kernel CKA concern: 0.7895914892434099




Kernel CKA non-concern: 0.7602356126299074




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6250891365317083




CCA coefficients mean non-concern: 0.6379925990638822




Linear CKA concern: 0.7192161892553364




Linear CKA non-concern: 0.7689770554509082




Kernel CKA concern: 0.7088829959758378




Kernel CKA non-concern: 0.7606909172573344




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.635330907295624




CCA coefficients mean non-concern: 0.6340702059631288




Linear CKA concern: 0.755035402462105




Linear CKA non-concern: 0.7658890510963287




Kernel CKA concern: 0.7679489734236458




Kernel CKA non-concern: 0.7586659418897788




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6389148244624515




CCA coefficients mean non-concern: 0.634551379816282




Linear CKA concern: 0.8016894384438684




Linear CKA non-concern: 0.7561259132672338




Kernel CKA concern: 0.8136353473126156




Kernel CKA non-concern: 0.745929807113194




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6251639719971799




CCA coefficients mean non-concern: 0.6373562538972091




Linear CKA concern: 0.840195068680649




Linear CKA non-concern: 0.7550650290590424




Kernel CKA concern: 0.8418627035696076




Kernel CKA non-concern: 0.7506574627974287




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6313159672595258




CCA coefficients mean non-concern: 0.6380468449175939




Linear CKA concern: 0.85949732176995




Linear CKA non-concern: 0.7646143490046359




Kernel CKA concern: 0.8439748072571105




Kernel CKA non-concern: 0.7590356789410106




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6502838752101479




CCA coefficients mean non-concern: 0.6340526099508932




Linear CKA concern: 0.8280352721843806




Linear CKA non-concern: 0.7613309248400421




Kernel CKA concern: 0.809954534352872




Kernel CKA non-concern: 0.7564364301726711




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6323035082280707




CCA coefficients mean non-concern: 0.6382076064498098




Linear CKA concern: 0.8202179818763575




Linear CKA non-concern: 0.7633300615943477




Kernel CKA concern: 0.8165208192725699




Kernel CKA non-concern: 0.7525346058552839




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6302120634830115




CCA coefficients mean non-concern: 0.6359073056429116




Linear CKA concern: 0.8317281160641713




Linear CKA non-concern: 0.7653066555708127




Kernel CKA concern: 0.7993183745253521




Kernel CKA non-concern: 0.7519534201739122




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6367732478575947




CCA coefficients mean non-concern: 0.6362602458489942




Linear CKA concern: 0.8929050329294518




Linear CKA non-concern: 0.7610438449925775




Kernel CKA concern: 0.8668101825482849




Kernel CKA non-concern: 0.7535484154450266




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.6162659335925741




CCA coefficients mean non-concern: 0.6392616455298007




Linear CKA concern: 0.49874786027545776




Linear CKA non-concern: 0.7811651278736644




Kernel CKA concern: 0.4698942414900115




Kernel CKA non-concern: 0.7780586893938082




In [11]:
get_sparsity(module)
print("original model's perplexity")
get_perplexity(model, valid_dataloader, config)
print("pruned model's perplexity")
get_perplexity(module, valid_dataloader, config)

0.19747944464138587




{'bert.encoder.layer.0.attention.self.query.weight': 0.5833333333333334, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5833333333333334, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5833333333333334, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5833333333333334, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.




original model's perplexity




2.445301055908203




pruned model's perplexity




2.6478118896484375




2.6478118896484375

In [12]:
df_list = [report_to_df(df) for df in result_list]
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df = df_list[0]
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-23_21-50-41




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.6733,0.6336,0.6529,797
1,1,0.8039,0.6929,0.7443,775
2,2,0.8315,0.8566,0.8439,795
3,3,0.856,0.782,0.8173,1110
4,4,0.7485,0.8103,0.7782,1260
5,5,0.875,0.6746,0.7618,882
6,6,0.8539,0.7277,0.7858,940
7,7,0.4549,0.5011,0.4769,473
8,8,0.5933,0.8097,0.6848,746
9,9,0.5158,0.6168,0.5618,689
