In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "bert-4-128-yahoo"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.3
seed = 44

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-22 18:20:14


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'YahooAnswersTopics'

,
 

'model_name'

: 

'models/bert-4-128-yahoo'

,
 

'num_labels'

: 

10

,
 

'tokenizer_name'

: 

'fabriceyhc/bert-base-uncased-yahoo_answers_topics'

}




The model models/bert-4-128-yahoo is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset YahooAnswersTopics.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset YahooAnswersTopics is loaded




{

'config_name'

: 

'yahoo_answers_topics'

,
 

'features'

: 

{'first_column': 'question_title', 'second_column': 'topic'}

,
 

'path'

: 

'yahoo_answers_topics'

}




In [7]:
from src.utils.load import load_cache
from src.utils.data_class import CustomEmbeddingDataset
from torch.utils.data import DataLoader

generated = load_cache(
    "datasets/generated_dataset/embedding_based/4_128-yahoo",
    "4_128-yahoo_top1.pkl",
)

4_128-yahoo_top1.pkl is loaded from cache.




In [8]:
generated["embeddings"] = generated.pop("example_list")
generated["labels"] = generated.pop("example_label")
generated["attention_mask"] = generated.pop("attn_list")

In [9]:
generated_data = CustomEmbeddingDataset(generated)
generated_dataloder = DataLoader(
    generated_data,
    batch_size=4,
)

In [10]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [11]:
config.init_seed()
all_samples = SamplingDataset(
    generated_dataloder,
    config,
    200,
    num_samples,
    False,
    4,
    resample=False,
)

In [12]:
result_list = []

module = copy.deepcopy(model)

head_importance_prunning(module, config, all_samples, ratio)

print(f"Evaluate the pruned model")
result = evaluate_model(module, config, test_dataloader, verbose=True)
result_list.append(result)

Total heads to prune: 4




tensor([[0.3952, 0.6048, 0.4106, 0.4775],
        [0.4851, 0.5362, 0.4935, 0.4638],
        [0.5171, 0.5280, 0.4731, 0.4720],
        [0.4691, 0.5217, 0.5309, 0.4839]])




{(0, 2), (1, 3), (3, 0), (0, 0)}




Evaluate the pruned model




Evaluating the model:   0%|                                                                                   …

In [13]:
for concern in range(num_labels):
    config.init_seed()
    get_similarity(model, module, valid_dataloader, concern, num_samples, config)

adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.99595780566942




CCA coefficients mean non-concern: 0.995920967172565




Linear CKA concern: 0.9898693473660769




Linear CKA non-concern: 0.9814797739881691




Kernel CKA concern: 0.9732359698077038




Kernel CKA non-concern: 0.9528283163241735




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.995423687114284




CCA coefficients mean non-concern: 0.9959923289179319




Linear CKA concern: 0.9777523536241712




Linear CKA non-concern: 0.9834397090198245




Kernel CKA concern: 0.9442894319822815




Kernel CKA non-concern: 0.9577636981793787




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9954230161535836




CCA coefficients mean non-concern: 0.9957964952078003




Linear CKA concern: 0.9823158718363971




Linear CKA non-concern: 0.9829018171520549




Kernel CKA concern: 0.9566663552098535




Kernel CKA non-concern: 0.9555391442789738




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9950945173049782




CCA coefficients mean non-concern: 0.9960397690053634




Linear CKA concern: 0.9785953964112769




Linear CKA non-concern: 0.9827980788287269




Kernel CKA concern: 0.9499054562104257




Kernel CKA non-concern: 0.9567917013644719




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.993021637220061




CCA coefficients mean non-concern: 0.9962926138649876




Linear CKA concern: 0.9790275271159297




Linear CKA non-concern: 0.983395270078057




Kernel CKA concern: 0.9591989438946065




Kernel CKA non-concern: 0.9577034032980474




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9942467251404323




CCA coefficients mean non-concern: 0.9959635566313542




Linear CKA concern: 0.964105828612179




Linear CKA non-concern: 0.9862014542255202




Kernel CKA concern: 0.9431673056967699




Kernel CKA non-concern: 0.9629229159549022




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9956505386650635




CCA coefficients mean non-concern: 0.9956905109782234




Linear CKA concern: 0.983467175232604




Linear CKA non-concern: 0.9837247087704227




Kernel CKA concern: 0.9543020906810448




Kernel CKA non-concern: 0.9588552137018207




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9952167433422232




CCA coefficients mean non-concern: 0.995727096854403




Linear CKA concern: 0.9915065879807645




Linear CKA non-concern: 0.9818406603646814




Kernel CKA concern: 0.9769285213513943




Kernel CKA non-concern: 0.9549696124330633




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9928561064363356




CCA coefficients mean non-concern: 0.9957058617328388




Linear CKA concern: 0.9732866454212116




Linear CKA non-concern: 0.9804925502350171




Kernel CKA concern: 0.9237233337207797




Kernel CKA non-concern: 0.9527833931121644




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.9962051825487243




CCA coefficients mean non-concern: 0.9958103416543899




Linear CKA concern: 0.9825162669388302




Linear CKA non-concern: 0.9816199906140864




Kernel CKA concern: 0.9582330671292507




Kernel CKA non-concern: 0.9551207165588267




In [14]:
get_sparsity(module)
print("original model's perplexity")
get_perplexity(model, valid_dataloader, config)
print("pruned model's perplexity")
get_perplexity(module, valid_dataloader, config)

0.04908409215849872




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.25, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.25, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.25, 'bert.encoder.layer.1.attention.self.value.bias': 0.0, 'bert.encode




original model's perplexity




3.2110652923583984




pruned model's perplexity




3.415860891342163




3.415860891342163

In [15]:
df_list = [report_to_df(df) for df in result_list]
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df = df_list[0]
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-22_18-22-32




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.5257,0.5053,0.5153,2992
1,1,0.6988,0.4094,0.5163,2992
2,2,0.6716,0.6139,0.6415,3012
3,3,0.3255,0.6614,0.4363,2998
4,4,0.7574,0.7373,0.7472,2973
5,5,0.8064,0.7678,0.7866,3054
6,6,0.7127,0.38,0.4957,3003
7,7,0.5375,0.6813,0.6009,3012
8,8,0.6348,0.5939,0.6137,2982
9,9,0.7498,0.6321,0.686,2982
