In [1]:
from llmfact import LayerOutputExtractor, FBNFeatureExtractor, GroupFBNFeatureExtractor, FBNExtractor, LLMFC
from llmfact.mask import MaskedGPT2ForSequenceClassification, MaskedGPT2AmplifiedForSequenceClassification, MaskedGPT2LMModel
from transformers import GPT2Model, GPT2Config, GPT2LMHeadModel, GPT2ForSequenceClassification, Trainer, TrainingArguments
from transformers import GPT2Tokenizer
from transformers import AutoTokenizer, AutoModel, AutoConfig, AutoModelForCausalLM, AutoModelForQuestionAnswering
from datasets import load_dataset, get_dataset_config_names, get_dataset_split_names
from torch.utils.data import DataLoader
from rouge_score import rouge_scorer
from evaluate import load

from llmfact.utils import IoU, correlation_activation, thresholding, write_layer_txt, evaluate_iou
from llmfact.stat import  StatICA, StatDictionaryLearning
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.manifold import TSNE
from sklearn.decomposition import FastICA
import seaborn as sns
import pandas as pd
from tqdm.auto import tqdm

In [2]:
model_name = "THUDM/chatglm3-6b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")



Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [3]:
model.device

device(type='cuda', index=0)

In [4]:
%%time
input_ids = tokenizer.encode("Harry Potter is a series of seven fantasy novels written by British author, [HL]J. K. Rowling[HL]. Who wrote Harry Potter? ", return_tensors="pt")
outputs = model.generate(input_ids.to(model.device), 
                         max_new_tokens=50)
print("Generated:", tokenizer.decode(outputs[0][34:], skip_special_tokens=True))

Generated: J. K. Rowling
CPU times: user 17.2 s, sys: 1.18 s, total: 18.4 s
Wall time: 18.6 s


## SQuAD

In [5]:
dataset = load_dataset("rajpurkar/squad")
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

In [5]:
from torch.utils.data import Dataset

class SquadDataset(torch.utils.data.Dataset):
    def __init__(self, data_dict, tokenizer, max_length=800):
        self.context = data_dict['context']
        self.question = data_dict['question']
        self.answers = data_dict['answers']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.context)

    def __getitem__(self, idx):
        context = self.context[idx]
        question = self.question[idx]
        answers = self.answers[idx]['text']
        if len(answers) < 4:
            answers += ["0"] * (4 - len(answers))
        else:
            answers = answers[:4]
        prompt = f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {context} \n Question:{question} \n Answer:"
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'answers': answers
        }

In [6]:
def compute_exact_match(prediction, ground_truths):
    prediction = prediction.lower().strip()
    for gt in ground_truths:
        gt = gt.lower().strip()
        if prediction == gt:
            return 1
    return 0
    
import collections
def compute_f1(prediction, ground_truths):
    def _f1_score(pred_tokens, gold_tokens):
        common = collections.Counter(pred_tokens) & collections.Counter(gold_tokens)
        num_same = sum(common.values())
        if num_same == 0:
            return 0
        precision = 1.0 * num_same / len(pred_tokens)
        recall = 1.0 * num_same / len(gold_tokens)
        f1 = (2 * precision * recall) / (precision + recall)
        return f1
    
    prediction = prediction.lower().strip()
    pred_tokens = prediction.split()
    
    max_f1 = 0
    for gt in ground_truths:
        gt = gt.lower().strip()
        gold_tokens = gt.split()
        f1 = _f1_score(pred_tokens, gold_tokens)
        max_f1 = max(max_f1, f1)
    
    return max_f1
def transpose_answers(batch_answers):
    transposed_answers = list(map(list, zip(*batch_answers)))
    return transposed_answers

In [7]:
def evaluate(model, tokenizer, test_loader, batch_size=12):
    exact_matches = []
    f1_scores = []
    
    test_dataset = SquadDataset(test_loader, tokenizer, max_length=600)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            batch['answers'] = transpose_answers(batch['answers'])
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=30,
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
                
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                reference_answers = batch['answers'][i]

                em_score = compute_exact_match(generated_answer, reference_answers)
                f1_score = compute_f1(generated_answer, reference_answers)
                
                exact_matches.append(em_score)
                f1_scores.append(f1_score)
    
    avg_em = np.mean(exact_matches)
    avg_f1 = np.mean(f1_scores)
    
    print(f"Average Exact Match: {avg_em:.4f}")
    print(f"Average F1 Score: {avg_f1:.4f}")

    return avg_em, avg_f1

In [32]:
evaluate(model, tokenizer, dataset['validation'], batch_size=32)

Evaluating:   0%|          | 0/331 [00:00<?, ?it/s]

Average Exact Match: 0.7876
Average F1 Score: 0.9021


(np.float64(0.7876064333017976), np.float64(0.9020856125791711))

In [8]:
model

ChatGLMForConditionalGeneration(
  (transformer): ChatGLMModel(
    (embedding): Embedding(
      (word_embeddings): Embedding(65024, 4096)
    )
    (rotary_pos_emb): RotaryEmbedding()
    (encoder): GLMTransformer(
      (layers): ModuleList(
        (0-27): 28 x GLMBlock(
          (input_layernorm): RMSNorm()
          (self_attention): SelfAttention(
            (query_key_value): Linear(in_features=4096, out_features=4608, bias=True)
            (core_attention): CoreAttention(
              (attention_dropout): Dropout(p=0.0, inplace=False)
            )
            (dense): Linear(in_features=4096, out_features=4096, bias=False)
          )
          (post_attention_layernorm): RMSNorm()
          (mlp): MLP(
            (dense_h_to_4h): Linear(in_features=4096, out_features=27392, bias=False)
            (dense_4h_to_h): Linear(in_features=13696, out_features=4096, bias=False)
          )
        )
      )
      (final_layernorm): RMSNorm()
    )
    (output_layer): Linear(in_

In [9]:
include_layers = []
for name, _ in model.named_modules():
    if "mlp.dense_4h_to_h" in name:
        include_layers.append(name)
include_layers

['transformer.encoder.layers.0.mlp.dense_4h_to_h',
 'transformer.encoder.layers.1.mlp.dense_4h_to_h',
 'transformer.encoder.layers.2.mlp.dense_4h_to_h',
 'transformer.encoder.layers.3.mlp.dense_4h_to_h',
 'transformer.encoder.layers.4.mlp.dense_4h_to_h',
 'transformer.encoder.layers.5.mlp.dense_4h_to_h',
 'transformer.encoder.layers.6.mlp.dense_4h_to_h',
 'transformer.encoder.layers.7.mlp.dense_4h_to_h',
 'transformer.encoder.layers.8.mlp.dense_4h_to_h',
 'transformer.encoder.layers.9.mlp.dense_4h_to_h',
 'transformer.encoder.layers.10.mlp.dense_4h_to_h',
 'transformer.encoder.layers.11.mlp.dense_4h_to_h',
 'transformer.encoder.layers.12.mlp.dense_4h_to_h',
 'transformer.encoder.layers.13.mlp.dense_4h_to_h',
 'transformer.encoder.layers.14.mlp.dense_4h_to_h',
 'transformer.encoder.layers.15.mlp.dense_4h_to_h',
 'transformer.encoder.layers.16.mlp.dense_4h_to_h',
 'transformer.encoder.layers.17.mlp.dense_4h_to_h',
 'transformer.encoder.layers.18.mlp.dense_4h_to_h',
 'transformer.encoder.

In [10]:
class MaskedModel(MaskedGPT2LMModel):
    def __init__(self, model, include_layers=[]):
        super().__init__(model, include_layers)

    def forward(self, 
                input_ids,
                attention_mask,
                max_new_tokens=50):
        with torch.no_grad():
            generated_output = self.model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=max_new_tokens
            )
        return generated_output

In [11]:
def evaluate2(model, tokenizer, include_layers, mask, test_loader, batch_size=32):
    exact_matches = []
    f1_scores = []
    
    test_dataset = SquadDataset(test_loader, tokenizer, max_length=600)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()

    masked_model = MaskedModel(model, include_layers)
    mask = torch.tensor(mask.reshape(28, 4096), dtype=torch.bool).to(device)
    masked_model.register_hooks(mask)
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            batch['answers'] = transpose_answers(batch['answers'])
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=30
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
                
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                reference_answers = batch['answers'][i]

                em_score = compute_exact_match(generated_answer, reference_answers)
                f1_score = compute_f1(generated_answer, reference_answers)
                
                exact_matches.append(em_score)
                f1_scores.append(f1_score)
    masked_model.remove_hooks()
    avg_em = np.mean(exact_matches)
    avg_f1 = np.mean(f1_scores)
    
    print(f"Average Exact Match: {avg_em:.4f}")
    print(f"Average F1 Score: {avg_f1:.4f}")

    return avg_em, avg_f1

In [12]:
def remove_hooks(module):
    if hasattr(module, "_forward_hooks"):
        module._forward_hooks.clear()
    if hasattr(module, "_backward_hooks"):
        module._backward_hooks.clear()
    for child in module.children():
        remove_hooks(child)

remove_hooks(model)

In [38]:
mask_matrix = torch.rand(28, 4096) < 0.02
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate2(model, tokenizer, include_layers, mask_matrix, dataset['validation'], batch_size=32)

tensor([ 72,  93,  84,  78,  86,  92,  76,  85,  78,  76,  84,  87,  96, 103,
         78,  68,  68,  80,  97, 102,  86,  84,  79,  88,  74,  80,  81,  83])
tensor(2338)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=torch.bool).to(device)


Evaluating:   0%|          | 0/331 [00:00<?, ?it/s]

Average Exact Match: 0.7901
Average F1 Score: 0.9036


(np.float64(0.7900662251655629), np.float64(0.9036269946917632))

In [39]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = []
for i in range(10):
    inputs = tokenizer(f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {dataset['train']['context'][i]} \n Question:{dataset['train']['question'][i]} \n Answer:", return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
# inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['article'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=3.6, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum(axis=1))
evaluate2(model, tokenizer, include_layers, any_mask, dataset['validation'], batch_size=36)
evaluate2(model, tokenizer, include_layers, ~any_mask, dataset['validation'], batch_size=36)

(10, 114688)
(1, 114688)
[  4   1   4   1   1   1   1   0   0   0   1   0   1   1   1   2   2   4
  22  31  94 105 206 287 425 495 607 552]
[2849]


Evaluating:   0%|          | 0/294 [00:00<?, ?it/s]

Average Exact Match: 0.0000
Average F1 Score: 0.0358


Evaluating:   0%|          | 0/294 [00:00<?, ?it/s]

Average Exact Match: 0.0000
Average F1 Score: 0.0001
CPU times: user 22h 6min 4s, sys: 4min 32s, total: 22h 10min 37s
Wall time: 13h 3min 49s


(np.float64(0.0), np.float64(6.141418430419078e-05))

In [14]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = []
for i in range(10):
    inputs = tokenizer(f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {dataset['train']['context'][i]} \n Question:{dataset['train']['question'][i]} \n Answer:", return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
# inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['article'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=3.6, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum(axis=1))
evaluate2(model, tokenizer, include_layers, ~any_mask, dataset['validation'], batch_size=38)

(64, 114688)
(1, 114688)
[   7    1    6    1    1    1    1    1    1    1    1    1    2    1
    1    3    4   16   34  102  443  622 1161 1713 2452 3010 3185 3378]
[16150]


Evaluating:   0%|          | 0/279 [00:00<?, ?it/s]

Average Exact Match: 0.7258
Average F1 Score: 0.8655
CPU times: user 14h 50min 54s, sys: 2min 13s, total: 14h 53min 7s
Wall time: 6h 29min 58s


(np.float64(0.7258278145695364), np.float64(0.8655080424049906))

In [15]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = []
for i in range(10):
    inputs = tokenizer(f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {dataset['train']['context'][i]} \n Question:{dataset['train']['question'][i]} \n Answer:", return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
# inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['article'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=3.6, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum(axis=1))
evaluate2(model, tokenizer, include_layers, ~any_mask, dataset['validation'], batch_size=38)

(128, 114688)
(1, 114688)
[   8    2    4    0    1    1    1    1    1    1    1    1    5    2
    4    2    7   10   50  180  648 1059 1890 2665 3507 3871 3962 4019]
[21903]


Evaluating:   0%|          | 0/279 [00:00<?, ?it/s]

Average Exact Match: 0.7893
Average F1 Score: 0.9016
CPU times: user 16h 21min 42s, sys: 2min 20s, total: 16h 24min 2s
Wall time: 6h 34min 57s


(np.float64(0.7893093661305581), np.float64(0.9016385431285916))

In [16]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = []
for i in range(10):
    inputs = tokenizer(f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {dataset['train']['context'][i]} \n Question:{dataset['train']['question'][i]} \n Answer:", return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
# inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['article'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=3.6, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum(axis=1))
evaluate2(model, tokenizer, include_layers, ~any_mask, dataset['validation'], batch_size=38)

(256, 114688)
(1, 114688)
[  10    4    5    2    2    2    1    1    1    1    1    1    4    2
    3    5   13   37  134  348 1087 1939 2965 3753 4049 4088 4093 4095]
[26646]


Evaluating:   0%|          | 0/279 [00:00<?, ?it/s]

Average Exact Match: 0.7872
Average F1 Score: 0.9023
CPU times: user 16h 30min 23s, sys: 2min 19s, total: 16h 32min 43s
Wall time: 6h 35min 17s


(np.float64(0.7872280037842951), np.float64(0.9022577696457327))

In [17]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = []
for i in range(10):
    inputs = tokenizer(f"Please answer the question according to the following context. Just answer in brief only one sentence is enough. \n Context: {dataset['train']['context'][i]} \n Question:{dataset['train']['question'][i]} \n Answer:", return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
# inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['article'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=3.6, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum(axis=1))
evaluate2(model, tokenizer, include_layers, ~any_mask, dataset['validation'], batch_size=38)

(512, 114688)
(1, 114688)
[  11    4    6    2    2    2    2    2    1    1    1    1    5    3
    5   17   61  189  325  781 1874 2806 3698 4045 4094 4096 4096 4096]
[30226]


Evaluating:   0%|          | 0/279 [00:00<?, ?it/s]

Average Exact Match: 0.7876
Average F1 Score: 0.9021
CPU times: user 20h 45min 14s, sys: 3min 5s, total: 20h 48min 19s
Wall time: 6h 52min 11s


(np.float64(0.7876064333017976), np.float64(0.902072998261921))

## SST2

In [18]:
dataset = load_dataset("glue", 'sst2')
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 872
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1821
    })
})

In [19]:
class SST2Dataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=512):
        self.sentence = data_dict['sentence']
        self.label = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

        # self.dictionary = {
        #     0: "negative",
        #     1: "positive"
        # }
    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sentence = self.sentence[idx]
        label = self.label[idx]

        prompt = f"Given the sentence '{sentence}', it expresses a sentiment of positive or negative. You only need to answer positive or negative. Answer:"
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': label
        }

In [20]:
def predict_sentiment(generated_answer):
    if "positive" in generated_answer.lower():
        return 1
    elif "negative" in generated_answer.lower():
        return 0
    else:
        return -1

In [13]:
from sklearn.metrics import accuracy_score
from tqdm import trange
def evaluate_sst2(model, include_layers, tokenizer, test_loader, batch_size=32):
    test_dataset = SST2Dataset(test_loader, tokenizer, max_length=512)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=50
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_sentiment(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [27]:
evaluate_sst2(model, include_layers, tokenizer, dataset['validation'], batch_size=32)

Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.9392


0.9392201834862385

In [22]:
def evaluate_masked_sst2(model, include_layers, mask, tokenizer, test_loader, batch_size=36):
    test_dataset = SST2Dataset(test_loader, tokenizer, max_length=512)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    device = model.device
    model.eval()
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=30
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_sentiment(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [14]:
alpha = 3.6

In [24]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['validation']['sentence'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_sst2(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)
evaluate_masked_sst2(model, include_layers, any_mask, tokenizer, dataset['validation'], batch_size=38)

(10, 114688)
(1, 114688)
[  6   2   4   1   0   1   0   1   1   0   0   1   3   2   3   4   5  29
   7  23  10  12  15  11  16  69 350 659]
1235


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.1422
CPU times: user 10h 2min 43s, sys: 1min 32s, total: 10h 4min 15s
Wall time: 1h 28min 39s


0.14220183486238533

In [25]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['validation']['sentence'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_sst2(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(64, 114688)
(1, 114688)
[   9    4    5    2    1    1    1    1    1    1    2    1    5    5
    5   10    9   77   53  261  521  342  340  294  423 1518 2302 3196]
9390


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.8624
CPU times: user 9h 32min 33s, sys: 1min 24s, total: 9h 33min 57s
Wall time: 58min 45s


0.8623853211009175

In [26]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['validation']['sentence'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_sst2(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(128, 114688)
(1, 114688)
[   9    3    5    1    1    1    1    1    1    1    1    2    5    5
   20    9   11  131  178  487  977  675  749  779  911 2699 3359 3832]
14854


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.9300
CPU times: user 10h 12min 15s, sys: 1min 24s, total: 10h 13min 39s
Wall time: 1h 1min 32s


0.930045871559633

In [27]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['validation']['sentence'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_sst2(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)



(256, 114688)
(1, 114688)
[  10    3    7    1    1    1    1    1    2    1    2    3    5    4
   28    8    9  186  279  745 1253 1013 1087 1317 1578 3306 3806 4045]
18702


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.9381
CPU times: user 11h 57min 41s, sys: 1min 35s, total: 11h 59min 17s
Wall time: 1h 8min 20s


0.9380733944954128

In [28]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(inputs, return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['validation']['sentence'][:100]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_sst2(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)



(512, 114688)
(1, 114688)
[  12    5    7    1    2    1    2    1    1    1    2    2    6    4
   24   16   33  545  583 1508 2080 1996 2036 2384 3063 4024 4079 4095]
26513


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.9404
CPU times: user 19h 12min 16s, sys: 2min 33s, total: 19h 14min 50s
Wall time: 1h 36min 41s


0.9403669724770642

In [29]:
mask_matrix = torch.rand(28, 4096) < 0.02
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_sst2(model, include_layers, mask_matrix, tokenizer, dataset['validation'], batch_size=38)

tensor([82, 90, 95, 76, 74, 81, 83, 87, 78, 86, 92, 75, 84, 65, 84, 88, 86, 87,
        71, 93, 82, 93, 92, 78, 82, 85, 73, 97])
tensor(2339)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/23 [00:00<?, ?it/s]

Accuracy: 0.9381


0.9380733944954128

## COLA

In [15]:
dataset = load_dataset("nyu-mll/glue", "cola")
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})

In [31]:
class ColaDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=70):
        self.sentence = data_dict['sentence']
        self.label = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

        # self.dictionary = {
        #     0: "negative",
        #     1: "positive"
        # }
    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sentence = self.sentence[idx]
        label = self.label[idx]

        prompt = f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. \n Sentence:{sentence} \n Please only answer 'acceptable' or 'unacceptable'. Answer:"
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': label
        }

In [32]:
def predict_cola(generated_answer):
    if "acceptable" in generated_answer.lower():
        return 1
    elif "unacceptable" in generated_answer.lower():
        return 0
    else:
        return -1

In [33]:
def evaluate_cola(model, include_layers, tokenizer, test_loader, batch_size=36):
    test_dataset = ColaDataset(test_loader, tokenizer, max_length=70)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_cola(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [34]:
evaluate_cola(model, include_layers, tokenizer, dataset['validation'], batch_size=38)

Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.6894


0.6893576222435283

In [35]:
def evaluate_masked_cola(model, include_layers, mask, tokenizer, test_loader, batch_size=36):
    test_dataset = ColaDataset(test_loader, tokenizer, max_length=80)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_cola(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [36]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_cola(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)
evaluate_masked_cola(model, include_layers, any_mask, tokenizer, dataset['validation'], batch_size=38)

(10, 114688)
(1, 114688)
[   7    1    4    1    1    1    1    1    1    0    1    1    2    1
    0    2    1    6    5    6   23   48  109  211  224  450  601 1258]
2967


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.0000
CPU times: user 2d 15h 14min 50s, sys: 12min 9s, total: 2d 15h 26min 59s
Wall time: 4h 20min 8s


0.0

In [37]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_cola(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(64, 114688)
(1, 114688)
[   8    2    4    1    2    2    2    1    1    1    1    2    5    2
    2   20   27   37   51   83  219  307  578  882 1192 2426 2851 3597]
12306


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.6913
CPU times: user 2d 14h 24min, sys: 9min 13s, total: 2d 14h 33min 14s
Wall time: 4h 10min 52s


0.6912751677852349

In [38]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_cola(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(128, 114688)
(1, 114688)
[   8    3    4    1    1    2    2    1    1    1    1    2    5    2
    4   14   29  106  137  213  447  559  832 1296 1659 3182 3500 3954]
15966


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.6913
CPU times: user 2d 15h 30min 37s, sys: 8min 24s, total: 2d 15h 39min 2s
Wall time: 4h 14min 4s


0.6912751677852349

In [39]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_cola(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(256, 114688)
(1, 114688)
[  10    4    4    3    2    1    1    1    1    1    1    2    5    2
    3   24   52  264  311  413  825 1023 1406 1931 2320 3848 3955 4082]
20495


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.6913
CPU times: user 2d 15h 28min 5s, sys: 8min 45s, total: 2d 15h 36min 50s
Wall time: 4h 14min 10s


0.6912751677852349

In [40]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_cola(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)



(512, 114688)
(1, 114688)
[  10    3    7    2    2    1    1    1    1    1    1    2    5    2
    7   66  134  736  791  923 1759 2122 2597 3016 3367 4092 4088 4095]
27832


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.6913
CPU times: user 2d 22h 43min 57s, sys: 9min 8s, total: 2d 22h 53min 5s
Wall time: 4h 40min 37s


0.6912751677852349

In [41]:
mask_matrix = torch.rand(28, 4096) < 0.10
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_cola(model, include_layers, mask_matrix, tokenizer, dataset['validation'], batch_size=38)

tensor([400, 396, 418, 401, 398, 399, 396, 413, 414, 397, 388, 395, 383, 432,
        439, 384, 433, 384, 439, 387, 437, 409, 413, 431, 413, 405, 419, 376])
tensor(11399)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

Accuracy: 0.5753


0.5752636625119847

## MRPC

In [15]:
dataset = load_dataset("nyu-mll/glue", "mrpc")
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 3668
    })
    validation: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 408
    })
    test: Dataset({
        features: ['sentence1', 'sentence2', 'label', 'idx'],
        num_rows: 1725
    })
})

In [16]:
class MRPCDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=200):
        self.sentence1 = data_dict['sentence1']
        self.sentence2 = data_dict['sentence2']
        self.label = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sentence1 = self.sentence1[idx]
        sentence2 = self.sentence2[idx]
        label = self.label[idx]

        prompt = f"""
        Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
        Sentence 1: {sentence1}
        Sentence 2: {sentence2}
        Please only respond with "yes" if they are paraphrases, or "no" if they are not.
        Answer:
        """
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': label
        }
        
def predict_mrpc(generated_answer):
    if "yes" in generated_answer.lower():
        return 1
    elif "no" in generated_answer.lower():
        return 0
    else:
        return -1
        
def evaluate_mrpc(model, include_layers, tokenizer, test_loader, batch_size=38):
    test_dataset = MRPCDataset(test_loader, tokenizer, max_length=200)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_mrpc(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

def evaluate_masked_mrpc(model, include_layers, mask, tokenizer, test_loader, batch_size=38):
    test_dataset = MRPCDataset(test_loader, tokenizer, max_length=200)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_mrpc(generated_answer)
                    
                reference_answers = batch['label'][i]
                if generated_answer == -1:
                    if reference_answers == 0:
                        generated_answer = 1
                    else:
                        generated_answer = 0
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [44]:
evaluate_mrpc(model, include_layers, tokenizer, dataset['validation'], batch_size=38)

Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.8162


0.8161764705882353

In [26]:
alpha = 3.6

In [33]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(60):
    prompt = f"""Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
    Sentence 1: {dataset["train"]['sentence1'][i]}
    Sentence 2: {dataset["train"]['sentence2'][i]}
    Please only respond with "yes" if they are paraphrases, or "no" if they are not.
    Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mrpc(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)
evaluate_masked_mrpc(model, include_layers, any_mask, tokenizer, dataset['validation'], batch_size=38)

(10, 114688)
(1, 114688)
[   6    1    4    1    1    1    1    1    1    0    1    1    2    1
    2    1    2    4   20   21   53   52   94  119  144  367  401 1042]
2344


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.0441
CPU times: user 1d 9h 54min 6s, sys: 4min 46s, total: 1d 9h 58min 53s
Wall time: 2h 27min 13s


0.04411764705882353

In [34]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(60):
    prompt = f"""Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
    Sentence 1: {dataset["train"]['sentence1'][i]}
    Sentence 2: {dataset["train"]['sentence2'][i]}
    Please only respond with "yes" if they are paraphrases, or "no" if they are not.
    Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mrpc(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(64, 114688)
(1, 114688)
[  12    4    5    3    2    1    1    1    1    1    1    2    5    2
    3    4    6  132   71  138  279  506  793 1118 1672 2534 2887 3733]
13917


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.7770
CPU times: user 1d 10h 8min 43s, sys: 4min 22s, total: 1d 10h 13min 6s
Wall time: 2h 22min 53s


0.7769607843137255

In [35]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(60):
    prompt = f"""Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
    Sentence 1: {dataset["train"]['sentence1'][i]}
    Sentence 2: {dataset["train"]['sentence2'][i]}
    Please only respond with "yes" if they are paraphrases, or "no" if they are not.
    Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mrpc(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(128, 114688)
(1, 114688)
[  10    4    5    2    2    2    2    1    1    1    2    2    5    3
    4    7   19  211  166  325  573 1027 1222 1605 2256 3313 3712 4038]
18520


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.8186
CPU times: user 1d 10h 1min 36s, sys: 4min 1s, total: 1d 10h 5min 37s
Wall time: 2h 22min 22s


0.8186274509803921

In [36]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(60):
    prompt = f"""Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
    Sentence 1: {dataset["train"]['sentence1'][i]}
    Sentence 2: {dataset["train"]['sentence2'][i]}
    Please only respond with "yes" if they are paraphrases, or "no" if they are not.
    Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mrpc(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(256, 114688)
(1, 114688)
[  12    4    5    3    4    1    1    1    1    2    2    2    5    3
    3    5   21  307  237  529  991 1745 1784 2440 3194 3914 4061 4092]
23369


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.8162
CPU times: user 1d 12h 8min 51s, sys: 5min 12s, total: 1d 12h 14min 4s
Wall time: 2h 30min 42s


0.8161764705882353

In [37]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(60):
    prompt = f"""Task: Compare the two sentences below and determine if they are paraphrases of each other. A paraphrase means that both sentences express the same idea, though they may use different words or structures.
    Sentence 1: {dataset["train"]['sentence1'][i]}
    Sentence 2: {dataset["train"]['sentence2'][i]}
    Please only respond with "yes" if they are paraphrases, or "no" if they are not.
    Answer:"""
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mrpc(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=38)

(512, 114688)
(1, 114688)
[  12    4    5    3    4    1    3    1    1    2    3    2    5    2
    4   13   53  497  479  861 1644 2570 2762 3523 3961 4090 4096 4096]
28697


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.8162
CPU times: user 1d 17h 18min 55s, sys: 6min 1s, total: 1d 17h 24min 57s
Wall time: 2h 53min 1s


0.8161764705882353

In [50]:
mask_matrix = torch.rand(28, 4096) < 0.10
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_mrpc(model, include_layers, mask_matrix, tokenizer, dataset['validation'], batch_size=38)

tensor([414, 410, 429, 402, 404, 418, 417, 409, 401, 391, 413, 397, 448, 438,
        404, 416, 426, 416, 398, 392, 456, 438, 423, 414, 396, 394, 380, 422])
tensor(11566)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/11 [00:00<?, ?it/s]

Accuracy: 0.8039


0.803921568627451

## MNLI

In [17]:
dataset = load_dataset("nyu-mll/glue", "mnli")
dataset

DatasetDict({
    train: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 392702
    })
    validation_matched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9815
    })
    validation_mismatched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9832
    })
    test_matched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9796
    })
    test_mismatched: Dataset({
        features: ['premise', 'hypothesis', 'label', 'idx'],
        num_rows: 9847
    })
})

In [18]:
class MNLIDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=300):
        self.premise = data_dict['premise']
        self.hypothesis = data_dict['hypothesis']
        self.label = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sentence1 = self.premise[idx]
        sentence2 = self.hypothesis[idx]
        label = self.label[idx]
        
        prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {sentence1}
            Hypothesis: {sentence2}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
 
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': label
        }
        
def predict_mnli(generated_answer):
    if "entailment" in generated_answer.lower():
        return 2
    elif "contradiction" in generated_answer.lower():
        return 0
    elif "neutral" in generated_answer.lower():
        return 1
    else:
        return 3
        
def evaluate_mnli(model, include_layers, tokenizer, test_loader, batch_size=38):
    test_dataset = MNLIDataset(test_loader, tokenizer, max_length=300)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_mnli(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

def evaluate_masked_mnli(model, include_layers, mask, tokenizer, test_loader, batch_size=38):
    test_dataset = MNLIDataset(test_loader, tokenizer, max_length=300)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            

            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_mnli(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [19]:
evaluate_mnli(model, include_layers, tokenizer, dataset['validation_matched'], batch_size=34)

Evaluating:   0%|          | 0/289 [00:00<?, ?it/s]

Accuracy: 0.2062


0.20621497707590422

In [20]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {dataset["train"]['premise'][i]}
            Hypothesis: {dataset["train"]['hypothesis'][i]}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mnli(model, include_layers, ~any_mask, tokenizer, dataset['validation_matched'], batch_size=32)
evaluate_masked_mnli(model, include_layers, any_mask, tokenizer, dataset['validation_matched'], batch_size=32)

(10, 114688)
(1, 114688)
[   6    1    4    1    1    1    1    1    1    0    1    1    2    1
    0    3    2    3    6    8   24   42  100  104  159  419  670 1837]
3399


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.0000
CPU times: user 2d 14h 26min 26s, sys: 12min 46s, total: 2d 14h 39min 12s
Wall time: 8h 48min 31s


0.0

In [21]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {dataset["train"]['premise'][i]}
            Hypothesis: {dataset["train"]['hypothesis'][i]}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mnli(model, include_layers, ~any_mask, tokenizer, dataset['validation_matched'], batch_size=32)

(64, 114688)
(1, 114688)
[   7    2    5    1    3    1    1    1    1    1    1    1    5    2
    2    3    3   13   12   27   92  213  474  789 1220 2491 3138 4060]
12569


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.2123
CPU times: user 2d 11h 1min 9s, sys: 11min 44s, total: 2d 11h 12min 54s
Wall time: 6h 7min 51s


0.21232806928171166

In [22]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {dataset["train"]['premise'][i]}
            Hypothesis: {dataset["train"]['hypothesis'][i]}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mnli(model, include_layers, ~any_mask, tokenizer, dataset['validation_matched'], batch_size=32)

(128, 114688)
(1, 114688)
[  11    4    6    2    2    2    2    1    1    1    1    2    5    4
    7    5    9   25   30   74  233  369  738 1226 1782 3190 3761 4093]
15586


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.2041
CPU times: user 2d 10h 14min 40s, sys: 9min 23s, total: 2d 10h 24min 3s
Wall time: 6h 2min 36s


0.20407539480387163

In [23]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {dataset["train"]['premise'][i]}
            Hypothesis: {dataset["train"]['hypothesis'][i]}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mnli(model, include_layers, ~any_mask, tokenizer, dataset['validation_matched'], batch_size=32)

(256, 114688)
(1, 114688)
[  12    4    5    3    3    2    2    2    1    1    1    2    5    2
    7   11   12   79   72  211  598  857 1413 2104 2651 3892 4055 4096]
20103


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.2062
CPU times: user 2d 12h 4min 57s, sys: 9min 55s, total: 2d 12h 14min 52s
Wall time: 6h 9min 25s


0.20621497707590422

In [None]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Determine the relationship between two sentences. The relationship can be one of three types: entailment, contradiction, or neutral.
            Premise: {dataset["train"]['premise'][i]}
            Hypothesis: {dataset["train"]['hypothesis'][i]}
            Please only answer with "entailment" if they are entailment, "contradiction" if they are contradiction, or "neutral" if they are neutral.
            Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_mnli(model, include_layers, ~any_mask, tokenizer, dataset['validation_matched'], batch_size=32)

In [24]:
mask_matrix = torch.rand(28, 4096) < 0.10
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_mnli(model, include_layers, mask_matrix, tokenizer, dataset['validation_matched'], batch_size=32)

tensor([429, 430, 382, 418, 398, 364, 412, 413, 421, 366, 365, 425, 406, 397,
        406, 404, 438, 430, 415, 423, 380, 380, 403, 399, 394, 366, 417, 404])
tensor(11285)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/307 [00:00<?, ?it/s]

Accuracy: 0.2292


0.22924095771777891

## QNLI

In [25]:
dataset = load_dataset("nyu-mll/glue", "qnli")
dataset

train-00000-of-00001.parquet:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/872k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/877k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/104743 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5463 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5463 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'sentence', 'label', 'idx'],
        num_rows: 104743
    })
    validation: Dataset({
        features: ['question', 'sentence', 'label', 'idx'],
        num_rows: 5463
    })
    test: Dataset({
        features: ['question', 'sentence', 'label', 'idx'],
        num_rows: 5463
    })
})

In [28]:
class QNLIDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=400):
        self.question = data_dict['question']
        self.sentence = data_dict['sentence']
        self.label = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        sentence1 = self.question[idx]
        sentence2 = self.sentence[idx]
        label = self.label[idx]
        
        prompt = f"""Task: Analyze the following question and sentence to determine if the sentence contains the answer to the question. The relationship can be one of two types: entailment (the sentence answers the question) or not_entailment (the sentence does not answer the question).
            Question: {sentence1}
            Sentence: {sentence2}
            Please only answer with "Entailment" if the sentence answers the question, or "Not Entailment" if it does not.
            Answer:"""
 
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': label
        }
        
def predict_qnli(generated_answer):
    if "not entailment" in generated_answer.lower():
        return 0
    elif "entailment" in generated_answer.lower():
        return 1
    else:
        return 2
        
def evaluate_qnli(model, include_layers, tokenizer, test_loader, batch_size=38):
    test_dataset = QNLIDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_qnli(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

def evaluate_masked_qnli(model, include_layers, mask, tokenizer, test_loader, batch_size=38):
    test_dataset = QNLIDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_qnli(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [29]:
evaluate_qnli(model, include_layers, tokenizer, dataset['validation'], batch_size=32)

Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1067


0.10671792055647081

In [30]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = dataset['train']['question'][i] + dataset['train']['sentence'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qnli(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=32)
evaluate_masked_qnli(model, include_layers, any_mask, tokenizer, dataset['validation'], batch_size=32)

(10, 114688)
(1, 114688)
[  8   3   7   1   2   0   1   1   1   1   1   1   5   2   2   3   3   4
   3   5   6   5   8  21  32  65 164 550]
905


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.0053
CPU times: user 20h 44min 10s, sys: 2min 48s, total: 20h 46min 59s
Wall time: 3h 35min 44s


0.005308438586857038

In [31]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = dataset['train']['question'][i] + dataset['train']['sentence'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qnli(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=32)

(64, 114688)
(1, 114688)
[   6    2    5    1    1    1    1    1    1    1    1    2    5    4
    4    6    9   10  104  535  659  955 1272 1463 1563 1857 2398 2979]
13846


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1545
CPU times: user 20h 22min 26s, sys: 2min 56s, total: 20h 25min 22s
Wall time: 2h 23min 34s


0.15449386783818414

In [32]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = dataset['train']['question'][i] + dataset['train']['sentence'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qnli(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=32)

(128, 114688)
(1, 114688)
[   8    3    8    1    2    1    1    1    1    1    1    3    6    3
    4    6    9   13  187  858 1288 1655 2312 2634 2726 3243 3597 3769]
22341


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1157
CPU times: user 20h 30min 48s, sys: 2min 52s, total: 20h 33min 41s
Wall time: 2h 23min 47s


0.11568735127219476

In [33]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = dataset['train']['question'][i] + dataset['train']['sentence'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qnli(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=32)

(256, 114688)
(1, 114688)
[   8    3    5    1    3    1    1    1    1    2    2    2    5    4
    4    7   24   24  299 1140 1923 2545 3195 3526 3760 4011 4067 4081]
28645


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1078
CPU times: user 21h 56min 51s, sys: 2min 56s, total: 21h 59min 48s
Wall time: 2h 28min 53s


0.10781621819513088

In [34]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = dataset['train']['question'][i] + dataset['train']['sentence'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qnli(model, include_layers, ~any_mask, tokenizer, dataset['validation'], batch_size=32)



(512, 114688)
(1, 114688)
[  12    5    7    1    4    1    2    1    1    1    1    2    7    2
    3   15   16   46  392 1253 2159 2996 3650 3927 4052 4090 4096 4096]
30838


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1067
CPU times: user 1d 8h 40min 40s, sys: 3min 6s, total: 1d 8h 43min 46s
Wall time: 3h 2min 31s


0.10671792055647081

In [35]:
mask_matrix = torch.rand(28, 4096) < 0.10
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_qnli(model, include_layers, mask_matrix, tokenizer, dataset['validation'], batch_size=32)

tensor([412, 409, 420, 422, 435, 418, 398, 424, 415, 407, 401, 385, 403, 406,
        386, 414, 419, 418, 434, 416, 397, 420, 400, 424, 422, 425, 437, 396])
tensor(11563)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/171 [00:00<?, ?it/s]

Accuracy: 0.1197


0.11971444261394838

## AGNEWS

In [36]:
dataset = load_dataset('fancyzhx/ag_news')
dataset

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 120000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 7600
    })
})

In [39]:
class AGNEWSDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=1024):
        self.texts = data_dict['text']
        self.labels = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {text}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(label)
        }
def predict_agnews(generated_answer):
    if "world" in generated_answer.lower():
        return 0
    elif "sports" in generated_answer.lower():
        return 1
    elif "business" in generated_answer.lower():
        return 2
    elif "sci/tech" in generated_answer.lower():
        return 3
    else:
        return 4
        
def evaluate_agnews(model, include_layers, tokenizer, test_loader, batch_size=38):
    test_dataset = AGNEWSDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_agnews(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc
    
def evaluate_masked_agnews(model, include_layers, mask, tokenizer, test_loader, batch_size=38):
    test_dataset = AGNEWSDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_agnews(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [40]:
evaluate_agnews(model, include_layers, tokenizer, dataset['test'], batch_size=32)

Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.9128


0.9127631578947368

In [41]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {dataset['train']['text'][i]}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_agnews(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=32)
evaluate_masked_agnews(model, include_layers, any_mask, tokenizer, dataset['test'], batch_size=32)

(10, 114688)
(1, 114688)
[  9   1   5   1   2   1   1   1   1   0   1   1   2   2   2   5   4   6
   7  12  27  41  45  29  77 315 586 914]
2098


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.0000


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.0025
CPU times: user 2d 8h 5min 6s, sys: 6min 3s, total: 2d 8h 11min 10s
Wall time: 6h 32min 10s


0.0025

In [42]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {dataset['train']['text'][i]}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=64, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_agnews(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=38)

(64, 114688)
(1, 114688)
[   8    2    4    1    3    2    2    1    1    1    1    2    5    3
    3   15   36   96   88  189  284  585  436  648  947 2161 2874 3433]
11831


Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Accuracy: 0.9036
CPU times: user 2d 7h 5min 31s, sys: 5min 51s, total: 2d 7h 11min 22s
Wall time: 4h 59min 27s


0.9035526315789474

In [43]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {dataset['train']['text'][i]}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=128, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_agnews(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=32)

(128, 114688)
(1, 114688)
[  10    3    8    2    2    1    1    1    1    1    2    2    5    3
    5   18   62  161  291  511  972 1228 1190 1453 1758 3220 3687 3942]
18540


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.9087
CPU times: user 2d 7h 33min 3s, sys: 4min 54s, total: 2d 7h 37min 57s
Wall time: 4h 48min 15s


0.9086842105263158

In [44]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {dataset['train']['text'][i]}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=256, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_agnews(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=32)

(256, 114688)
(1, 114688)
[   8    3    7    2    3    1    1    1    1    1    1    2    5    3
    6   27   56  205  492 1134 1884 2282 2375 2820 3136 3893 4056 4084]
26489


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.9128
CPU times: user 2d 7h 33min 23s, sys: 4min 57s, total: 2d 7h 38min 21s
Wall time: 4h 47min 24s


0.9127631578947368

In [45]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(100):
    prompt = f"""Task: Classify the following news article into one of four categories: World, Sports, Business, or Sci/Tech.
        Article: {dataset['train']['text'][i]}
        You only need to answer the article belongs to the [World, Sports, Business or Sci/Tech] category.
        Answer: """
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=512, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_agnews(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=32)

(512, 114688)
(1, 114688)
[  11    4    6    2    2    2    2    1    1    1    1    2    5    3
    6   27   79  336  905 1782 2762 3298 3542 3832 3968 4092 4096 4096]
32864


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.9128
CPU times: user 2d 17h 30min 24s, sys: 5min 25s, total: 2d 17h 35min 50s
Wall time: 5h 24min 40s


0.9127631578947368

In [46]:
mask_matrix = torch.rand(28, 4096) < 0.10
print(mask_matrix.reshape(28, 4096).sum(axis=1))
print(mask_matrix.sum())
evaluate_masked_agnews(model, include_layers, mask_matrix, tokenizer, dataset['test'], batch_size=32)

tensor([412, 410, 352, 421, 419, 407, 424, 417, 387, 434, 404, 415, 420, 439,
        411, 382, 409, 434, 409, 385, 393, 428, 415, 389, 391, 411, 411, 413])
tensor(11442)


  mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)


Evaluating:   0%|          | 0/238 [00:00<?, ?it/s]

Accuracy: 0.9030


0.9030263157894737

## qqp

In [15]:
dataset = load_dataset("nyu-mll/glue", "qqp")
dataset

train-00000-of-00001.parquet:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/3.73M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/36.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/363846 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/40430 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/390965 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 363846
    })
    validation: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 40430
    })
    test: Dataset({
        features: ['question1', 'question2', 'label', 'idx'],
        num_rows: 390965
    })
})

In [17]:
dataset['test']['question1'][1]

'What are the top ten Consumer-to-Consumer E-commerce online?'

In [18]:
class QQPDataset(Dataset):
    def __init__(self, data_dict, tokenizer, max_length=1024):
        self.question1 = data_dict['question1']
        self.question2 = data_dict['question2']
        self.labels = data_dict['label']
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        question1 = self.question1[idx]
        question2 = self.question2[idx]
        label = self.labels[idx]
        prompt = f"""Task: Compare the following two questions, paying attention to their topics, details, and intents. Then determine whether they are asking the same thing.
                Question 1: {question1}
                Question 2: {question2}
                Please answer "same" if they are asking the same thing, or "different" if they are not. Answer:"""
        encoding = self.tokenizer(prompt, return_tensors="pt", max_length=self.max_length, truncation=True, padding="max_length")
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(label)
        }
def predict_qqp(generated_answer):
    if "different" in generated_answer.lower():
        return 0
    elif "same" in generated_answer.lower():
        return 1
    else:
        return 2
        
def evaluate_qqp(model, include_layers, tokenizer, test_loader, batch_size=38):
    test_dataset = QQPDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_qqp(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
    
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc
    
def evaluate_masked_qqp(model, include_layers, mask, tokenizer, test_loader, batch_size=38):
    test_dataset = QQPDataset(test_loader, tokenizer, max_length=400)
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    mask = torch.tensor(mask.reshape(28, 4096), dtype=bool)
    
    masked_model = MaskedModel(model, include_layers)
    masked_model.register_hooks(mask)
    
    device = model.device
    model.eval()
    
    all_predictions = []
    all_labels = test_loader['label']
    
    with torch.no_grad():
        for batch in tqdm(test_dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            
            generated_sequences = masked_model.forward(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=10
            )
            
            
            generated_answers = []
            for i in range(len(generated_sequences)):
                input_length = input_ids.shape[1]
                generated_answer = tokenizer.decode(
                    generated_sequences[i][input_length:], 
                    skip_special_tokens=True
                )
                generated_answers.append(generated_answer)
            for i in range(len(generated_answers)):
                generated_answer = generated_answers[i]
                generated_answer = predict_qqp(generated_answer)
                    
                reference_answers = batch['label'][i]
                all_predictions.append(generated_answer)
                
    masked_model.remove_hooks()
    acc = accuracy_score(all_predictions, all_labels)
    print(f"Accuracy: {acc:.4f}")

    return acc

In [None]:
evaluate_qqp(model, include_layers, tokenizer, dataset['validation'], batch_size=42)

Evaluating:   0%|          | 0/963 [00:00<?, ?it/s]

In [None]:
%%time
extractor = GroupFBNFeatureExtractor(model, include_layers=include_layers, device=model.device)
# inputs_list = [tokenizer(f"Task: Determine whether the following English sentences are grammatically correct or linguistically acceptable. Please only answer 'acceptable' or 'unacceptable'. \n Sentence:{inputs} \n Answer:", return_tensors="pt", max_length=1024, truncation=True) for inputs in dataset['train']['sentence'][:200]]
inputs_list = []
for i in range(50):
    prompt = dataset['train']['question1'][i] + "\t" + dataset['train']['question2'][i]
    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True)
    inputs_list.append(inputs)
feature_masked = extractor.fit(inputs_list=inputs_list, n_components=10, alpha=alpha, random_state=666)
components = extractor.mixing_
print(components.shape)
any_mask = np.any(components, axis=0).reshape(1, -1)
print(any_mask.shape)
print(any_mask.reshape(28, 4096).sum(axis=1))
print(any_mask.sum())
evaluate_masked_qqp(model, include_layers, ~any_mask, tokenizer, dataset['test'], batch_size=32)