In [1]:
import os
import sys

sys.path.append("../../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from src.utils.helper import Config, color_print
from src.utils.load import load_model, load_data, save_checkpoint
from src.models.evaluate import (
    evaluate_model,
    get_sparsity,
    get_similarity,
    get_perplexity,
)
from src.utils.sampling import SamplingDataset
from src.pruning.prune_head import head_importance_prunning
from src.pruning.prune import prune_concern_identification
from src.utils.helper import report_to_df, append_nth_row

In [3]:
name = "IMDB"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ratio = 0.6
seed = 44
include_layers = ["intermediate", "output"]
exclude_layers = [
    "attention",
]

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-10-20 00:02:19


In [5]:
config = Config(name, device)
num_labels = config.config["num_labels"]
model = load_model(config)

Loading the model.




{

'architectures'

: 

'bert'

,
 

'dataset_name'

: 

'IMDB'

,
 

'model_name'

: 

'textattack/bert-base-uncased-imdb'

,
 

'num_labels'

: 

2

,
 

'tokenizer_name'

: 

'textattack/bert-base-uncased-imdb'

}




The model textattack/bert-base-uncased-imdb is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    config,
    batch_size=batch_size,
    num_workers=num_workers,
    do_cache=True,
)

Loading cached dataset IMDB.




train.pkl is loaded from cache.




valid.pkl is loaded from cache.




test.pkl is loaded from cache.




The dataset IMDB is loaded




{

'config_name'

: 

'plain_text'

,
 

'features'

: 

{'first_column': 'text', 'second_column': 'label'}

,
 

'path'

: 

'imdb'

}




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, config, test_dataloader)

In [8]:
result_list = []

for concern in range(config.num_labels):
    config.init_seed()
    positive_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        True,
        4,
        resample=False,
    )
    negative_samples = SamplingDataset(
        train_dataloader,
        config,
        concern,
        num_samples,
        False,
        4,
        resample=False,
    )
    all_samples = SamplingDataset(
        train_dataloader,
        config,
        200,
        num_samples,
        False,
        4,
        resample=False,
    )

    module = copy.deepcopy(model)

    head_importance_prunning(module, config, all_samples, ratio)

    prune_concern_identification(
        module,
        config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ratio,
        keep_dim=False,
        method="structed",
    )

    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, config, test_dataloader, verbose=True)
    result_list.append(result)
    get_sparsity(module)

    get_similarity(model, module, valid_dataloader, concern, num_samples, config)
    print("original model's perplexity")
    get_perplexity(model, valid_dataloader, config)
    print("pruned model's perplexity")
    get_perplexity(module, valid_dataloader, config)

Total heads to prune: 86




tensor([[0.5385, 0.4080, 0.4691, 0.5637, 0.5390, 0.4439, 0.5190, 0.4272, 0.4888,
         0.4545, 0.6183, 0.3817],
        [0.3811, 0.7038, 0.2962, 0.2981, 0.6218, 0.3951, 0.5404, 0.3963, 0.4056,
         0.3963, 0.4869, 0.5001],
        [0.7018, 0.3968, 0.3042, 0.2946, 0.2830, 0.3394, 0.3822, 0.2859, 0.2958,
         0.7170, 0.3353, 0.3565],
        [0.5079, 0.3977, 0.3184, 0.5424, 0.3104, 0.6144, 0.3219, 0.4152, 0.3848,
         0.6896, 0.4527, 0.5941],
        [0.4527, 0.4084, 0.4214, 0.5190, 0.4054, 0.6017, 0.4455, 0.4555, 0.3966,
         0.4975, 0.6034, 0.5755],
        [0.4534, 0.3214, 0.2436, 0.4015, 0.5182, 0.3032, 0.3531, 0.5374, 0.4019,
         0.7564, 0.5319, 0.3514],
        [0.4782, 0.3763, 0.4695, 0.4171, 0.4990, 0.6237, 0.5380, 0.4421, 0.5104,
         0.5605, 0.4333, 0.6132],
        [0.5826, 0.5346, 0.5279, 0.4711, 0.5681, 0.4406, 0.4656, 0.3921, 0.4873,
         0.4912, 0.6079, 0.4727],
        [0.4072, 0.5928, 0.4339, 0.4284, 0.5270, 0.4948, 0.4331, 0.4388, 0.4204,




{(4, 0), (5, 1), (8, 0), (8, 9), (10, 6), (0, 5), (2, 2), (2, 11), (6, 2), (4, 2), (3, 6), (5, 3), (8, 2), (9, 10), (11, 7), (2, 4), (0, 7), (1, 8), (7, 3), (3, 8), (5, 5), (9, 3), (11, 0), (0, 9), (11, 9), (1, 10), (7, 5), (3, 1), (3, 10), (11, 2), (0, 2), (1, 3), (7, 7), (5, 0), (10, 8), (1, 5), (6, 1), (5, 2), (4, 4), (5, 11), (9, 9), (10, 1), (10, 10), (1, 7), (2, 6), (7, 11), (3, 7), (4, 6), (8, 6), (10, 3), (1, 0), (1, 9), (0, 11), (2, 8), (5, 6), (4, 8), (8, 8), (10, 5), (1, 2), (2, 1), (2, 10), (6, 10), (3, 2), (4, 1), (8, 10), (10, 7), (2, 3), (6, 3), (3, 4), (8, 3), (10, 9), (11, 8), (2, 5), (10, 2), (9, 4), (11, 1), (0, 1), (2, 7), (6, 7), (7, 6), (4, 7), (5, 8), (8, 7), (9, 6), (11, 3), (6, 0)}




Evaluate the pruned model 0




Evaluating the model:   0%|                                                                               | 0/…

0.3275692993518973




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.6666666666666666, 'bert.encoder.layer.1.att




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.48309621996906355




CCA coefficients mean non-concern: 0.4770790493392562




Linear CKA concern: 0.20383617710124047




Linear CKA non-concern: 0.17129765874715963




Kernel CKA concern: 0.06881655119466627




Kernel CKA non-concern: 0.05071487187173964




original model's perplexity




1.0879530906677246




pruned model's perplexity




2.006338119506836




Total heads to prune: 86




tensor([[0.5385, 0.4080, 0.4691, 0.5637, 0.5390, 0.4439, 0.5190, 0.4272, 0.4888,
         0.4545, 0.6183, 0.3817],
        [0.3811, 0.7038, 0.2962, 0.2981, 0.6218, 0.3951, 0.5404, 0.3963, 0.4056,
         0.3963, 0.4869, 0.5001],
        [0.7018, 0.3968, 0.3042, 0.2946, 0.2830, 0.3394, 0.3822, 0.2859, 0.2958,
         0.7170, 0.3353, 0.3565],
        [0.5079, 0.3977, 0.3184, 0.5424, 0.3104, 0.6144, 0.3219, 0.4152, 0.3848,
         0.6896, 0.4527, 0.5941],
        [0.4527, 0.4084, 0.4214, 0.5190, 0.4054, 0.6017, 0.4455, 0.4555, 0.3966,
         0.4975, 0.6034, 0.5755],
        [0.4534, 0.3214, 0.2436, 0.4015, 0.5182, 0.3032, 0.3531, 0.5374, 0.4019,
         0.7564, 0.5319, 0.3514],
        [0.4782, 0.3763, 0.4695, 0.4171, 0.4990, 0.6237, 0.5380, 0.4421, 0.5104,
         0.5605, 0.4333, 0.6132],
        [0.5826, 0.5346, 0.5279, 0.4711, 0.5681, 0.4406, 0.4656, 0.3921, 0.4873,
         0.4912, 0.6079, 0.4727],
        [0.4072, 0.5928, 0.4339, 0.4284, 0.5270, 0.4948, 0.4331, 0.4388, 0.4204,




{(4, 0), (5, 1), (8, 0), (8, 9), (10, 6), (0, 5), (2, 2), (2, 11), (6, 2), (4, 2), (3, 6), (5, 3), (8, 2), (9, 10), (11, 7), (2, 4), (0, 7), (1, 8), (7, 3), (3, 8), (5, 5), (9, 3), (11, 0), (0, 9), (11, 9), (1, 10), (7, 5), (3, 1), (3, 10), (11, 2), (0, 2), (1, 3), (7, 7), (5, 0), (10, 8), (1, 5), (6, 1), (5, 2), (4, 4), (5, 11), (9, 9), (10, 1), (10, 10), (1, 7), (2, 6), (7, 11), (3, 7), (4, 6), (8, 6), (10, 3), (1, 0), (1, 9), (0, 11), (2, 8), (5, 6), (4, 8), (8, 8), (10, 5), (1, 2), (2, 1), (2, 10), (6, 10), (3, 2), (4, 1), (8, 10), (10, 7), (2, 3), (6, 3), (3, 4), (8, 3), (10, 9), (11, 8), (2, 5), (10, 2), (9, 4), (11, 1), (0, 1), (2, 7), (6, 7), (7, 6), (4, 7), (5, 8), (8, 7), (9, 6), (11, 3), (6, 0)}




Evaluate the pruned model 1




Evaluating the model:   0%|                                                                               | 0/…

0.3275692993518973




{'bert.encoder.layer.0.attention.self.query.weight': 0.5, 'bert.encoder.layer.0.attention.self.query.bias': 0.0, 'bert.encoder.layer.0.attention.self.key.weight': 0.5, 'bert.encoder.layer.0.attention.self.key.bias': 0.0, 'bert.encoder.layer.0.attention.self.value.weight': 0.5, 'bert.encoder.layer.0.attention.self.value.bias': 0.0, 'bert.encoder.layer.0.attention.output.dense.weight': 0.5, 'bert.encoder.layer.0.attention.output.dense.bias': 0.0, 'bert.encoder.layer.0.intermediate.dense.weight': 0.0, 'bert.encoder.layer.0.intermediate.dense.bias': 0.0, 'bert.encoder.layer.0.output.dense.weight': 0.0, 'bert.encoder.layer.0.output.dense.bias': 0.0, 'bert.encoder.layer.1.attention.self.query.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.query.bias': 0.0, 'bert.encoder.layer.1.attention.self.key.weight': 0.6666666666666666, 'bert.encoder.layer.1.attention.self.key.bias': 0.0, 'bert.encoder.layer.1.attention.self.value.weight': 0.6666666666666666, 'bert.encoder.layer.1.att




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: 0.4770228068030268




CCA coefficients mean non-concern: 0.4830501236733592




Linear CKA concern: 0.17223686783413647




Linear CKA non-concern: 0.20281352771365405




Kernel CKA concern: 0.05195953988190124




Kernel CKA non-concern: 0.06695865134512466




original model's perplexity




1.0879530906677246




pruned model's perplexity




2.008406162261963




In [9]:
df_list = [report_to_df(df) for df in result_list]
new_df = append_nth_row(df_list)
csv_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
new_df.to_csv(f"results/{csv_name}.csv", index=False)
print(csv_name)
new_df

2024-10-20_00-18-50




Unnamed: 0,class,precision,recall,f1-score,support
0,0,0.0,0.0,0.0,12500
1,1,0.5,1.0,0.6667,12500
