In [4]:
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification
) 
import torch
from models import *

device = ('cuda' if torch.cuda.is_available() else 'cpu')


You passed along `num_labels=3` with an incompatible id to label map: {'0': 'Refused', '1': 'Entailed'}. The number of labels wil be overwritten to 2.


In [5]:
from pprint import pprint
import pandas as pd

In [6]:
evidence = """
Narendra Modi is the 14th Prime Minister of India. He won the largest share of seats in elections.
"""

data = data = {"Prime Minister": ["Narendra Modi", "Manmohan Singh", "Nehru"], "Number of seats": ["312", "121", "300"]}
table = pd.DataFrame.from_dict(data)

claim = 'Nehru won the largest number of seats.'

txt_int = txt_tokenizer(evidence, claim, truncation=True, return_tensors='pt').to(device)
output = txt_model(txt_int["input_ids"])
txt_prediction = torch.softmax(output['logits'][0], -1)
txt_labels = labels = ['Support', 'NotEnoughInfo', 'Refute']

text_pred = {name: round(float(pred)*100, 1) for pred, name in zip(txt_prediction, labels)}

tab_encoding = tab_tokenizer(table, claim, return_tensors='pt')
tab_outputs = tab_model(**tab_encoding)
pred_class_idx = tab_outputs.logits[0].argmax(dim=0).item()

print(f'Evidence: {evidence}')
print(table)

print(f'Claim: {claim}')
print()
pprint(f'Text Verification: {text_pred}')
pprint(f'Table Verification: {tab_model.config.id2label[pred_class_idx]}')

Evidence: 
Narendra Modi is the 14th Prime Minister of India. He won the largest share of seats in elections.

   Prime Minister Number of seats
0   Narendra Modi             312
1  Manmohan Singh             121
2           Nehru             300
Claim: Nehru won the largest number of seats.

"Text Verification: {'Support': 0.2, 'NotEnoughInfo': 6.9, 'Refute': 92.9}"
'Table Verification: Refused'


In [7]:
evidence = """
नरेंद्र मोदी भारत के 14वें प्रधानमंत्री हैं। उन्होंने चुनावों में सबसे अधिक सीटें जीतीं।
"""

data = data = {"Prime Minister": ["Narendra Modi", "Manmohan Singh", "Nehru"], "Number of seats": ["312", "121", "300"]}
table = pd.DataFrame.from_dict(data)

claim = 'Modi won the largest seats in the elections'

txt_int = txt_tokenizer(evidence, claim, truncation=True, return_tensors='pt').to(device)
output = txt_model(txt_int["input_ids"])
txt_prediction = torch.softmax(output['logits'][0], -1)
txt_labels = labels = ['Support', 'NotEnoughInfo', 'Refute']

text_pred = {name: round(float(pred)*100, 1) for pred, name in zip(txt_prediction, labels)}

tab_encoding = tab_tokenizer(table, claim, return_tensors='pt')
tab_outputs = tab_model(**tab_encoding)
pred_class_idx = tab_outputs.logits[0].argmax(dim=0).item()

print(f'Evidence: {evidence}')
print(table)

print(f'Claim: {claim}')
print()
pprint(f'Text Verification: {text_pred}')
pprint(f'Table Verification: {tab_model.config.id2label[pred_class_idx]}')

Evidence: 
नरेंद्र मोदी भारत के 14वें प्रधानमंत्री हैं। उन्होंने चुनावों में सबसे अधिक सीटें जीतीं।

   Prime Minister Number of seats
0   Narendra Modi             312
1  Manmohan Singh             121
2           Nehru             300
Claim: Modi won the largest seats in the elections

"Text Verification: {'Support': 97.1, 'NotEnoughInfo': 1.9, 'Refute': 1.0}"
'Table Verification: Entailed'


In [10]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from transformers_interpret import PairwiseSequenceClassificationExplainer


pairwise_explainer = PairwiseSequenceClassificationExplainer(txt_model, txt_tokenizer)

pairwise_attr = pairwise_explainer(evidence, claim)

In [11]:
pairwise_explainer.visualize("attn.html")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,entailment (0.97),entailment,4.41,[CLS] ▁नर ेंद्र ▁मो दी ▁भारत ▁के ▁14 वें ▁प्रधान मंत्री ▁हैं । ▁उ न्ह ों ने ▁चु नाव ों ▁में ▁सब से ▁अधिक ▁सी टें ▁जी ती ं । [SEP] ▁Modi ▁won ▁the ▁ largest ▁seat s ▁in ▁the ▁ elections [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,entailment (0.97),entailment,4.41,[CLS] ▁नर ेंद्र ▁मो दी ▁भारत ▁के ▁14 वें ▁प्रधान मंत्री ▁हैं । ▁उ न्ह ों ने ▁चु नाव ों ▁में ▁सब से ▁अधिक ▁सी टें ▁जी ती ं । [SEP] ▁Modi ▁won ▁the ▁ largest ▁seat s ▁in ▁the ▁ elections [SEP]
,,,,


In [12]:
from transformers import pipeline

In [14]:
pipe = pipeline('fill-mask', model='bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [17]:
pipe(f"{pipe.tokenizer.mask_token} was the first Prime Minister of India")

[{'score': 0.9909085631370544,
  'token': 2002,
  'token_str': 'he',
  'sequence': 'he was the first prime minister of india'},
 {'score': 0.00138852559030056,
  'token': 2016,
  'token_str': 'she',
  'sequence': 'she was the first prime minister of india'},
 {'score': 0.0010983875254169106,
  'token': 5960,
  'token_str': 'singh',
  'sequence': 'singh was the first prime minister of india'},
 {'score': 0.0005622405442409217,
  'token': 23556,
  'token_str': 'nehru',
  'sequence': 'nehru was the first prime minister of india'},
 {'score': 0.00048819667426869273,
  'token': 12338,
  'token_str': 'gandhi',
  'sequence': 'gandhi was the first prime minister of india'}]