In [2]:
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [88]:
def tokenize_sst2(example):
    return tokenizer(example["sentence"], truncation=True)

def tokenize_agnews(example):
    return tokenizer(example["text"], truncation=True)

In [None]:
def shuffle_weights(model, N):
    """Randomly permute N% of the weights in `model`.

    This is a fast approximation of re-initializing the weights of a model.

    Assumes weights are distributed independently of the dimensions of the weight tensors
      (i.e., the weights have the same distribution along each dimension).

    :param Model model: Modify the weights of the given model.
    """
    names, weights = get_weights(model)

    perm_weights = [np.random.permutation(w.flat).reshape(w.shape) for w in weights]
    # Faster, but less random: only permutes along the first dimension
    # weights = [np.random.permutation(w) for w in weights]
    set_weights(model, names, perm_weights, weights, N)
    
def get_weights(model):
    ws = []
    names = []
    for i in model.named_parameters():
        name = i[0]
        if 'weight' in name:
            names.append(name)
            ws.append(i[1].data.detach().cpu().numpy())
    return names, ws

def set_weights(model, names, perm_w, w, N):
    model_dict = dict(model.named_parameters())
    num_to_perm = int(len(w) * N)
    print(num_to_perm)
    perm_idx = np.random.choice(len(w), num_to_perm, replace=False)
    for i, name in enumerate(names):
        if i in perm_idx:
            model_dict[name].data.copy_(torch.tensor(perm_w[i]).to('cuda:0'))

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)

   # Calculate precision, recall, and F1-score
    precision = precision_score(labels, preds, average='weighted')
    recall = recall_score(labels, preds, average='weighted')
    f1 = f1_score(labels, preds, average='weighted')
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# SST@

# SST2

In [63]:
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-SST-2")
model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-SST-2")

In [64]:
raw_sst2 = load_dataset('glue', 'sst2')

Found cached dataset glue (/scratch/users/aliyahhsu/huggingface-cache/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

In [65]:
tokenized_datasets = raw_sst2.map(tokenize_sst2, task='sst2', batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Loading cached processed dataset at /scratch/users/aliyahhsu/huggingface-cache/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-732e98c83ed3eeea.arrow
Loading cached processed dataset at /scratch/users/aliyahhsu/huggingface-cache/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-54e98f13df2bb21e.arrow


Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [66]:
training_args = TrainingArguments("sst2-finetuned-model")

In [74]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=raw_sst2["train"],
    eval_dataset=raw_sst2["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

In [None]:
evaluation_results = trainer.evaluate(tokenized_datasets["validation"])
print(evaluation_results)

In [73]:
shuffle_weights(model, 0.05)

5


In [77]:
trainer.save_model("perm5perc_sst2_model")

### Evaluate permuted model

In [78]:
perm_model = AutoModelForSequenceClassification.from_pretrained("perm5perc_sst2_model")

In [79]:
trainer = Trainer(
    model=perm_model,
    args=training_args,
    train_dataset=raw_sst2["train"],
    eval_dataset=raw_sst2["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

In [80]:
evaluation_results = trainer.evaluate(tokenized_datasets["validation"])
print(evaluation_results)

{'eval_loss': 0.6473379731178284, 'eval_accuracy': 0.6089449541284404, 'eval_precision': 0.705911057072326, 'eval_recall': 0.6089449541284404, 'eval_f1': 0.5515552067785796, 'eval_runtime': 2.3478, 'eval_samples_per_second': 371.411, 'eval_steps_per_second': 46.426}


# ag-news

In [97]:
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-ag-news")
model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-ag-news")

In [98]:
raw_agnews = load_dataset('ag_news')

tokenized_datasets = raw_agnews.map(tokenize_agnews, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Found cached dataset ag_news (/scratch/users/aliyahhsu/huggingface-cache/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /scratch/users/aliyahhsu/huggingface-cache/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548/cache-cd902749757e4ea3.arrow
Loading cached processed dataset at /scratch/users/aliyahhsu/huggingface-cache/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548/cache-d0c4d86faa14431b.arrow


In [100]:
training_args = TrainingArguments("agnews-finetuned-model")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=raw_agnews["train"],
    eval_dataset=raw_agnews["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

evaluation_results = trainer.evaluate(tokenized_datasets["test"])
print(evaluation_results)

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 0.839389443397522, 'eval_accuracy': 0.6667105263157894, 'eval_precision': 0.7894484623269422, 'eval_recall': 0.6667105263157894, 'eval_f1': 0.61325119110938, 'eval_runtime': 37.2097, 'eval_samples_per_second': 204.248, 'eval_steps_per_second': 25.531}


In [99]:
shuffle_weights(model, 0.10)

10


In [101]:
trainer.save_model("perm10perc_agnews_model")

### Evaluate permuted model

In [102]:
perm_model = AutoModelForSequenceClassification.from_pretrained("perm10perc_agnews_model")

In [103]:
trainer = Trainer(
    model=perm_model,
    args=training_args,
    train_dataset=raw_agnews["train"],
    eval_dataset=raw_agnews["test"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

In [104]:
evaluation_results = trainer.evaluate(tokenized_datasets["test"])
print(evaluation_results)

{'eval_loss': 0.839389443397522, 'eval_accuracy': 0.6667105263157894, 'eval_precision': 0.7894484623269422, 'eval_recall': 0.6667105263157894, 'eval_f1': 0.61325119110938, 'eval_runtime': 36.4451, 'eval_samples_per_second': 208.533, 'eval_steps_per_second': 26.067}


# CoLA

In [105]:
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-cola")
model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-cola")

Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/476 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [106]:
raw_cola = load_dataset('glue', 'cola')

tokenized_datasets = raw_cola.map(tokenize_sst2, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Downloading and preparing dataset glue/cola to /scratch/users/aliyahhsu/huggingface-cache/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


Downloading data:   0%|          | 0.00/377k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

Dataset glue downloaded and prepared to /scratch/users/aliyahhsu/huggingface-cache/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

In [109]:
training_args = TrainingArguments("cola-finetuned-model")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=raw_cola["train"],
    eval_dataset=raw_cola["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

evaluation_results = trainer.evaluate(tokenized_datasets["validation"])
print(evaluation_results)

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.6283413767814636, 'eval_accuracy': 0.6912751677852349, 'eval_precision': 0.47786135759650467, 'eval_recall': 0.6912751677852349, 'eval_f1': 0.5650900181101524, 'eval_runtime': 1.4828, 'eval_samples_per_second': 703.416, 'eval_steps_per_second': 88.349}


In [108]:
shuffle_weights(model, 0.10)

10


In [110]:
trainer.save_model("perm10perc_cola_model")

### Evaluate permuted model

In [111]:
perm_model = AutoModelForSequenceClassification.from_pretrained("perm10perc_cola_model")

In [112]:
trainer = Trainer(
    model=perm_model,
    args=training_args,
    train_dataset=raw_cola["train"],
    eval_dataset=raw_cola["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,  # Define your custom metrics function
    tokenizer=tokenizer,
)

In [113]:
evaluation_results = trainer.evaluate(tokenized_datasets["validation"])
print(evaluation_results)

{'eval_loss': 0.6283413767814636, 'eval_accuracy': 0.6912751677852349, 'eval_precision': 0.47786135759650467, 'eval_recall': 0.6912751677852349, 'eval_f1': 0.5650900181101524, 'eval_runtime': 1.4766, 'eval_samples_per_second': 706.371, 'eval_steps_per_second': 88.72}


  _warn_prf(average, modifier, msg_start, len(result))
