In [None]:
import pandas as pd
import time, datetime, numpy as np
from transformers import BertTokenizer, BertForSequenceClassification
from transformers_interpret import SequenceClassificationExplainer
import torch
from torch.utils.data import Dataset, DataLoader
import time

In [48]:
test = pd.read_csv('path_to_test_csv')

In [49]:
def select_cols(df, col_list):
    '''
    Select columns from a dataframe
    '''
    return df[col_list]

def combine_sentences(df, col_list):

    results_df = df.copy()

    results_df['combined_text'] = '[CLS]' + results_df[col_list].astype(str).agg('[SEP]'.join, axis=1)

    return results_df

In [50]:
#Choose the target columns

target_cols = ['Sentence1', 'Sentence2', 'Explanation_1', 'gold_label'] # Premise, Hypothesis, Explanation
# target_cols = ['Sentence1', 'Sentence2', 'gold_label'] # Premise, Hypothesis
# target_cols = ['Sentence2', 'Sentence1', 'gold_label'] # Hypothesis, Premise
# target_cols = ['Sentence1', 'Explanation_1', 'Sentence2', 'gold_label'] # Premise, Explanation, Hypothesis
# target_cols = ['Sentence2', 'Explanation_1', 'Sentence1', 'gold_label'] # Hypothesis, Explanation, Premise

test_df = select_cols(test, target_cols)

In [51]:
test_df = combine_sentences(test_df, target_cols[:-1])

lables = {
    'entailment': 0,
    'neutral': 1,
    'contradiction': 2
}

test_df['labels'] = test_df['gold_label'].map(lables)

In [52]:
test_annotated = pd.read_csv('path_to_test_csv')
test_df["highlight_premise"] = test_annotated["Sentence1_marked_1"]
test_df["highlight_hypothesis"] = test_annotated["Sentence2_marked_1"]

In [53]:
test_df.head(2)

Unnamed: 0,Sentence1,Sentence2,Explanation_1,gold_label,combined_text,labels,highlight_premise,highlight_hypothesis
0,This church choir sings to the masses as they ...,The church has cracks in the ceiling.,Not all churches have cracks in the ceiling,neutral,[CLS]This church choir sings to the masses as ...,1,This church choir sings to the masses as they ...,The church has *cracks* *in* *the* *ceiling.*
1,This church choir sings to the masses as they ...,The church is filled with song.,"""Filled with song"" is a rephrasing of the ""cho...",entailment,[CLS]This church choir sings to the masses as ...,0,This church *choir* *sings* *to* *the* *masse...,The church is *filled* *with* *song.*


In [54]:
test_df['combined_text'][0]

'[CLS]This church choir sings to the masses as they sing joyous songs from the book at a church.[SEP]The church has cracks in the ceiling.[SEP]Not all churches have cracks in the ceiling'

In [9]:
# Setup device and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Initialize the model
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=3,
    output_attentions=True,
    output_hidden_states=False
)

#unfreezing layer 11 and the classifier. note: the pooler is still frozen
for name, param in model.named_parameters():
    if 'classifier' not in name and '11' not in name:
        param.requires_grad = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [10]:
# model_type = 'premise_hypothesis_explanation'
model_type = 'model_v1'
# model_type = 'hypothesis_premise'
# model_type = 'premise_explanation_hypothesis'
# model_type = 'hypothesis_explanation_premise'

model_save_path = f"path_to_model_weights"

model.load_state_dict(torch.load(model_save_path))

<All keys matched successfully>

In [24]:
test_df['combined_text'][0]

'[CLS]This church choir sings to the masses as they sing joyous songs from the book at a church.[SEP]The church has cracks in the ceiling.[SEP]Not all churches have cracks in the ceiling'

In [12]:
def extract_marked_token_attributions(version1, version2):
    index_1 = 0
    index_2 = 0
    desired_version = []
    for i in range(len(version2)):
      if index_1 < len(version1):
        if version1[index_1][0] != '*':
          index_1 += 1
          index_2 += 1
        else:
          index_1 += 1
          while version1[index_1][0] != '*':
            desired_version.append(version2[index_2])
            index_1 += 1
            index_2 += 1
          index_1 += 1

    return desired_version

In [None]:
from transformers_interpret import SequenceClassificationExplainer

def compute_segment_attributions(df, model, tokenizer):
    cls_explainer = SequenceClassificationExplainer(model, tokenizer)
    results = []

    for index, row in df.iterrows():
        text = row['combined_text']
        true_label = row['labels']
        highlight_premise = row['highlight_premise']
        highlight_hypothesis = row['highlight_hypothesis']

        text = text.replace("[CLS]", "").replace("[SEP]", "")
        print(f"\nProcessing index {index} with text: {text}")
        full_attributions = cls_explainer(text)

        if cls_explainer.predicted_class_index == true_label:
          annotated_attributions = cls_explainer(highlight_premise + " " + highlight_hypothesis) # premise before hypothesis (change order if input order is changed)

          desired_attributions = extract_marked_token_attributions(annotated_attributions, full_attributions)

          desired_attribution_score = 0
          total_positive_attribution_score = 0

          for pair in desired_attributions:
            print(f"Annotated Word: {pair[0]}\nAttribution: {pair[1]}")
            print("====================================================")
            desired_attribution_score += pair[1]

          for pair in full_attributions:
            if pair[1] > 0:
              total_positive_attribution_score += pair[1]

          print(f"Total Desired Attribution: {desired_attribution_score}")
          print(f"Total Attribution Score: {total_positive_attribution_score}")
          print(f"Test Case Score: {desired_attribution_score/total_positive_attribution_score}")

          results.append(desired_attribution_score/total_positive_attribution_score)

    return results

# Format is [CLS] Premise [SEP] Hypothesis [SEP] anything else
# VERY IMPORTANT: IT MUST BE PREMISE BEFORE HYPOTHESIS (if not then change the order where indicated)
results = compute_segment_attributions(test_df, model, tokenizer)
print("\nFinal Attribution results:", results)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Attribution: 0.2041970428374019
Annotated Word: in
Attribution: 0.22068736939743888
Total Desired Attribution: 0.9903893866885354
Total Attribution Score: 3.042191485358796
Test Case Score: 0.3255512979557659

Processing index 9528 with text: Several women in headscarves are standing in a cobbled courtyard.Several women wearing headscarves stand and talk to each other in a cobbled courtyard.
Annotated Word: talk
Attribution: -0.822270232815427
Annotated Word: to
Attribution: -0.020640264935934724
Annotated Word: each
Attribution: -0.15348131192078834
Annotated Word: other
Attribution: -0.23142576561134484
Total Desired Attribution: -1.2278175752834948
Total Attribution Score: 1.587453690691672
Test Case Score: -0.7734509563857075

Processing index 9529 with text: two young children wearing wool caps and mitten covering their mouths while whispering to each other.The children are yelling loudly.

Processing index 9530 with

In [None]:
values = results
df = pd.DataFrame(values, columns=['Attribution'])
df.index += 1
df.reset_index(inplace=True)
df.rename(columns={'index': 'Test Case'}, inplace=True)


df.to_csv('attribution2.csv', index=False) # Save to CSV
