# *SW12: use of pre-trained models and explainability (applied to "Sentiment Analysis" and "classification" tasks)*

inspired by [https://github.com/cdpierse/transformers-interpret#sequence-classification-explainer](https://github.com/cdpierse/transformers-interpret#sequence-classification-explainer), with input from this page: https://awesomeopensource.com/project/cdpierse/transformers-interpret

In [1]:
# !pip3 install -U transformers
# !pip3 install -U transformers_interpret

In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
#USE distilbert-base-uncased-finetuned-sst-2-english: a distilbert model finetuned on a sentiment analysis task.

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [18]:
# With both the model and tokenizer initialized we are now able to get explanations on an example text.
from transformers_interpret import SequenceClassificationExplainer, ZeroShotClassificationExplainer

In [19]:
cls_explainer = SequenceClassificationExplainer(
    model,
    tokenizer)
word_attributions = cls_explainer("I love you, I like you")
word_attributions

[('[CLS]', 0.0),
 ('i', 0.27785420350970025),
 ('love', 0.7792373444562729),
 ('you', 0.3856004761295062),
 (',', -0.017697541091358095),
 ('i', 0.1207189402855532),
 ('like', 0.19091115155373445),
 ('you', 0.3399487345372883),
 ('[SEP]', 0.0)]

In [None]:
cls_explainer.predicted_class_name

'POSITIVE'

In [None]:
m = 'facebook/bart-large-mnli'

tokenizer2 = AutoTokenizer.from_pretrained(m)

model2 = AutoModelForSequenceClassification.from_pretrained(m)

Downloading: 100%|██████████| 26.0/26.0 [00:00<00:00, 26.0kB/s]
Downloading: 100%|██████████| 1.13k/1.13k [00:00<00:00, 1.15MB/s]
Downloading: 100%|██████████| 878k/878k [00:00<00:00, 1.57MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 986kB/s] 
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 2.06MB/s]
Downloading: 100%|██████████| 1.52G/1.52G [02:08<00:00, 12.6MB/s]


In [None]:
zero_shot_explainer = ZeroShotClassificationExplainer(model2, tokenizer2)

In [None]:
input2 = "I love you, I like you but sometimes I'm not sure I would like to marry you..."

In [None]:
cls_explainer = SequenceClassificationExplainer(
    model,
    tokenizer)
expl1 = cls_explainer(input2)

In [None]:
expl1

[('[CLS]', 0.0),
 ('i', -0.17919146733536945),
 ('love', -0.10876658820352109),
 ('you', -0.1265794130538051),
 (',', -0.014920844993548758),
 ('i', -0.05214202142316559),
 ('like', 0.017265334274621937),
 ('you', -0.09352812008772914),
 ('but', 0.5639923594271705),
 ('sometimes', -0.3755296090841353),
 ('i', 0.046014115402454446),
 ("'", -0.15290425715631567),
 ('m', -0.1687553198408003),
 ('not', -0.3937190967701838),
 ('sure', 0.3696136745137524),
 ('i', 0.25223015334531246),
 ('would', 0.21676789155796447),
 ('like', 0.07449702447915288),
 ('to', -0.0017056287591338947),
 ('marry', 0.005898448957407097),
 ('you', 0.00805418736919376),
 ('.', 0.07557921845162473),
 ('.', -0.03429728829928755),
 ('.', 0.01368848883033041),
 ('[SEP]', 0.0)]

In [None]:
expl2 = zero_shot_explainer(
    input2,
    labels = ["negative", "neutral", "positive"],
)

In [None]:
zero_shot_explainer.predicted_label

'negative'

In [None]:
nn = zero_shot_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
negative,negative (0.40),negative,1.31,"#s I love you , I like you but sometimes I 'm not sure I would like to marry you . . ."
,,,,
neutral,neutral (0.39),neutral,1.48,"#s I love you , I like you but sometimes I 'm not sure I would like to marry you . . ."
,,,,
positive,positive (0.21),positive,0.28,"#s I love you , I like you but sometimes I 'm not sure I would like to marry you . . ."
,,,,


In [None]:
expl2 = cls_explainer("I love you, I like you, I also kinda dislike you", class_name="NEGATIVE")

In [None]:
cls_explainer.predicted_class_name

'POSITIVE'

In [None]:
nn = cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,POSITIVE (0.00),NEGATIVE,-1.63,"[CLS] i love you , i like you , i also kinda dislike you [SEP]"
,,,,


In [None]:
expl2 = cls_explainer("I love you, I like you, I also kinda dislike you", class_name="POSITIVE")

In [None]:
cls_explainer.predicted_class_name

'POSITIVE'

In [None]:
nn = cls_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,POSITIVE (1.00),POSITIVE,1.63,"[CLS] i love you , i like you , i also kinda dislike you [SEP]"
,,,,


In [None]:
text2 = """The capitalization of the AMD chip maker is going to sink, due to the chip shortage and the need for huge investments for scaling up the production capacity. 
    At the same time, small local producer can became relevant player in a now under pressure market."""

In [None]:
candidate_labels = ['music', 'energy', 'environment', 'dancing', 'exploration', 'aviation', "finance", "technology", "sports", "economy", "ecology", "stock"]

word_attributions = zero_shot_explainer(
    text2,
#    labels = ["finance", "technology", "sports", "economy", "ecology", "stock"],
    labels = candidate_labels,
)

In [None]:
import numpy as np

#print(type(word_attributions))

limit=25
          
header = "{:<16}".format(" ")
line = list()
v = list()
for key, vals in zip(word_attributions.keys(), word_attributions.values()):
    header += "{:<13}".format(key)
    i = 0
    
    for val in vals[:limit]:
        if ((i+1) > len(line)):
            line.append("{:<15}".format(val[0]))
            v.append(list())   
        line[i] += "{:>6.3f}       ".format(val[1])
        v[i].append(val[1])
        i += 1
            
#print(v)            
   
header += "{:<13}".format("STD.DEV")    
    
print(header)
i = 0
for elem in line:
    std = np.std(v[i])
    elem += "{:>6.3f}       ".format(std)
    #avg = np.average(v[i])
    #elem += "{:>6.3f}       ".format(avg)
    print(elem)
    i += 1

                music        energy       environment  dancing      exploration  aviation     finance      technology   sports       economy      ecology      stock        STD.DEV      
<s>             0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000       
The             0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000        0.000       
capital        -0.022       -0.085        0.002       -0.007       -0.110        0.011       -0.095       -0.157       -0.064       -0.045        0.090        0.039        0.066       
ization        -0.120       -0.062       -0.001        0.016        0.119        0.025       -0.120        0.242       -0.070       -0.050        0.173       -0.043        0.110       
of             -0.010       -0.717        0.026        0.205       -0.191 

In [None]:
zero_shot_explainer.predicted_label

'technology'

In [None]:
nn = zero_shot_explainer.visualize();

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
music,music (0.03),music,-1.41,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
energy,energy (0.06),energy,-2.05,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
environment,environment (0.10),environment,1.02,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
dancing,dancing (0.02),dancing,0.64,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
exploration,exploration (0.08),exploration,-1.11,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,


In [None]:
new_candidate_labels =  ['stock market', 'market', "finance", "technology", "economy", "stock"]

word_attributions = zero_shot_explainer(
    text2,
    labels = new_candidate_labels,
)

In [None]:
limit=25
          
header = "{:<16}".format(" ")
line = list()
v = list()
for key, vals in zip(word_attributions.keys(), word_attributions.values()):
    header += "{:<13}".format(key)
    i = 0
    
    for val in vals[:limit]:
        if ((i+1) > len(line)):
            line.append("{:<15}".format(val[0]))
            v.append(list())   
        line[i] += "{:>6.3f}       ".format(val[1])
        v[i].append(val[1])
        i += 1
            
#print(v)            
   
header += "{:<13}".format("STD.DEV")    
    
print(header)
i = 0
for elem in line:
    std = np.std(v[i])
    elem += "{:>6.3f}       ".format(std)
    #avg = np.average(v[i])
    #elem += "{:>6.3f}       ".format(avg)
    print(elem)
    i += 1

                stock market market       finance      technology   economy      stock        STD.DEV      
<s>             0.000        0.000        0.000        0.000        0.000        0.000        0.000       
The             0.000        0.000        0.000        0.000        0.000        0.000        0.000       
capital        -0.095       -0.136       -0.095       -0.157       -0.045        0.039        0.064       
ization        -0.235        0.432       -0.120        0.242       -0.050       -0.043        0.228       
of             -0.954        0.441       -0.637       -0.463       -0.304        0.276        0.491       
the            -0.091        0.098       -0.051       -0.120       -0.181        0.096        0.105       
AMD             0.028       -0.079        0.056       -0.058       -0.207        0.017        0.088       
chip            0.035       -0.104       -0.018       -0.088        0.103       -0.334        0.138       
maker          -0.029        0.114  

In [None]:
zero_shot_explainer.predicted_label

'market'

In [None]:
nn = zero_shot_explainer.visualize();

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
stock market,stock market (0.08),stock market,-1.83,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
market,market (0.42),market,-0.69,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
finance,finance (0.06),finance,0.01,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
technology,technology (0.20),technology,-1.77,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,
economy,economy (0.12),economy,-1.19,"#s The capital ization of the AMD chip maker is going to sink , due to the chip shortage and the need for huge investments for scaling up the production capacity . At the same time , small local producer can became relevant player in a now under pressure market ."
,,,,


In [None]:
#input from https://www.swissinfo.ch/eng/how-sustainable-fuels-created-from-thin-air-could-solve-the-energy-crisis/47120934 only the headline (limitation of the input lenght)

text =""""
A refinery that produces fuels from sunlight and air… the scenario sounds like science fiction. 
Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels. 
A Swiss spin-off is working to commercialise the technology.
"""

In [None]:
#import transformers
# print(transformers.__version__)

from transformers import pipeline

In [None]:
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

In [None]:
ret = classifier(text, candidate_labels, multi_label=True)

In [None]:
for e in zip(ret['labels'],ret['scores']):
    #print(e[1])
    print("class {:>20}  - relevance {:>6.3f}".format(e[0],e[1]))

class           technology  - relevance  0.983
class               energy  - relevance  0.922
class          environment  - relevance  0.640
class              economy  - relevance  0.384
class          exploration  - relevance  0.081
class              finance  - relevance  0.079
class                stock  - relevance  0.041
class              dancing  - relevance  0.015
class             aviation  - relevance  0.011
class              ecology  - relevance  0.007
class                music  - relevance  0.002
class               sports  - relevance  0.001


In [None]:
zero_shot_explainer(
    text,
    labels = candidate_labels
)

{'music': [('<s>', 0.0),
  ('"', 0.0),
  ('Ċ', 0.11088524846975557),
  ('A', 0.050180006123860935),
  ('refinery', 0.14624476560406088),
  ('that', -0.03754144899345838),
  ('produces', -0.277329819801346),
  ('fuels', 0.028148508917155396),
  ('from', 0.22892764819711572),
  ('sunlight', 0.13881053698846052),
  ('and', 0.37087259748874174),
  ('air', 0.2136406586542295),
  ('âĢ¦', -0.28363367626220304),
  ('the', 0.09778385705455998),
  ('scenario', 0.16551965992150827),
  ('sounds', 0.08264051362207814),
  ('like', 0.5343652437784276),
  ('science', -0.2079813238888459),
  ('fiction', 0.15836240842408447),
  ('.', -0.034786875424108896),
  ('Yet', 2.835328542106649e-05),
  ('Swiss', -0.02132723310032684),
  ('scientists', -0.03299952422319383),
  ('claim', 0.028969925781236385),
  ('to', -0.03593543006056325),
  ('have', 0.008767398325731588),
  ('done', 0.0006274314278981721),
  ('just', -0.026741335717444477),
  ('that', 0.07910175780983326),
  ('and', 0.01008791710560909),
  ('bui

In [None]:
zero_shot_explainer.predicted_label

'technology'

In [None]:
zero_shot_explainer.visualize();

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
music,music (0.02),music,1.51,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
energy,energy (0.31),energy,-0.81,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
environment,environment (0.10),environment,0.91,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
dancing,dancing (0.01),dancing,0.84,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
exploration,exploration (0.07),exploration,2.96,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,


In [None]:
res = zero_shot_explainer(
    text,
    labels = ["positive","neutral","negative"]
)

In [None]:
zero_shot_explainer.predicted_label

'negative'

In [None]:
zero_shot_explainer.visualize();

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
positive,positive (0.17),positive,-2.39,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
neutral,neutral (0.10),neutral,-2.74,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,
negative,negative (0.73),negative,1.24,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology ."
,,,,


In [None]:

sentiment_analysis = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")

Downloading: 100%|██████████| 687/687 [00:00<00:00, 687kB/s]
Downloading: 100%|██████████| 1.32G/1.32G [01:53<00:00, 12.6MB/s]
Downloading: 100%|██████████| 256/256 [00:00<00:00, 256kB/s]
Downloading: 100%|██████████| 780k/780k [00:00<00:00, 1.43MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 986kB/s] 
Downloading: 100%|██████████| 150/150 [00:00<00:00, 150kB/s]


In [None]:
sentiment_analysis(text)

[{'label': 'POSITIVE', 'score': 0.9976629018783569}]

In [None]:
cls_explainer = SequenceClassificationExplainer(
    sentiment_analysis.model,
    sentiment_analysis.tokenizer)

In [None]:
word_attributions = cls_explainer(text)

In [None]:
word_attributions

[('<s>', 0.0),
 ('"', 0.007870082009781332),
 ('Ċ', 0.00693632912658196),
 ('A', 0.04512590395020376),
 ('refinery', -0.025060749305012717),
 ('that', 0.055025609127968694),
 ('produces', -0.1306211568197365),
 ('fuels', -0.1039348125765158),
 ('from', -0.08677638833888772),
 ('sunlight', -0.21546397136173756),
 ('and', -0.0333374107793791),
 ('air', 0.02556695937016207),
 ('âĢ¦', -0.029080639515637),
 ('the', 0.050151759016345096),
 ('scenario', -0.04618141606793187),
 ('sounds', 0.019277531403004932),
 ('like', -0.020047256361169864),
 ('science', -0.07883026156733099),
 ('fiction', -0.19068569860133877),
 ('.', -0.020211790784732335),
 ('Yet', -0.04456579133376124),
 ('Swiss', -0.05453737695036269),
 ('scientists', 0.02649501347160408),
 ('claim', -0.023967887152561713),
 ('to', 0.03217060515337784),
 ('have', -0.0007655103731317917),
 ('done', 0.0003567744316199803),
 ('just', 0.037033832204685445),
 ('that', 0.02936884327259373),
 ('and', 0.03433691147603564),
 ('built', 0.0360684

In [None]:
cls_explainer.predicted_class_name

'POSITIVE'

In [None]:
cls_explainer.visualize();

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,POSITIVE (1.00),POSITIVE,-0.05,"#s "" Ċ A refinery that produces fuels from sunlight and air âĢ¦ the scenario sounds like science fiction . Yet Swiss scientists claim to have done just that and built a potentially scalable way of producing sustainable fuels . A Swiss spin - off is working to commercial ise the technology . #/s"
,,,,


## EXERCISE 1

use the review dataset, as from the folder in the data, and consider scores 1/2 as negative, 3 as neutral and 4/5 as positive

loop on all then entry and compute the class. Based on the real class, create a confusion matrix: how it performs?

In [28]:
import pandas as pd
import csv
#reviews = pd.read_csv("data/reviews.csv", sep ='',)
listOfReviews = []
i = 0
with open('data/reviews.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        if i < 20:
            expl2 = cls_explainer(row["content"], class_name="POSITIVE")
            nn = cls_explainer.visualize()
            listOfReviews.append(row["content"])
            i = i+1

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,POSITIVE (0.91),POSITIVE,2.26,[CLS] update : after getting a response from the developer i would change my rating to 0 stars if possible . these guys hide behind confusing and opaque terms and refuse to budge at all . i ' m so annoyed that my money has been lost to them ! really terrible customer experience . original : be very careful when signing up for a free trial of this app . if you happen to go over they automatically charge you for a full years subscription and refuse to ref ##und . terrible customer experience and the app is just ok . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.72,[CLS] used it for a fair amount of time without any problems . suddenly then asked me to create an account or log using google or f ##b . i used my google one only to discover everything was gone ! [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.01),POSITIVE,-2.59,[CLS] your app sucks now ! ! ! ! ! used to be good but now doesn ' t update until i physically open it up and then close it then scroll and then it finally shows what i want to put on the list ! ! ! ! ! ! and i fr ##ig ##gen paid for this garbage ! ! ! ! ! ! ! [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-7.88,"[CLS] it seems ok , but very basic . recurring tasks need some work to be actually useful . for example , it would be nice to be able to set a task to be recurring on the first of every month , without only being able to set that up on the first of the month . edit ; i also just noticed that there is no dark theme . both may be available as paid for options , but i ' ll never know , since they are basic options and without them , i have no reason to try this app , and thus will never pay for actual premium options . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-6.84,[CLS] absolutely worthless . this app runs a prohibit ##ively cl ##unk ##y interface that effectively void ##s the promise of ease - of - life automation . google calendar will provide all other services promised by any do for free with no annual subscription rate . don ' t sign up for the free trial . it ' s not worth getting ping ##ed for forty bucks when you decide you don ' t like it and forget to cancel it in time . save your money ! [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-7.56,"[CLS] be ##ware the trial period is a sc ##am i ' ve been using the free version for a while and decided to try the premium for 7 days per the trial ad . after 5 days i realized it didn ' t quite organize my information how i best work so i wrote to cancel . they informed me that i must have accidentally signed up for a non trial and by their strict terms cannot ref ##und me . so i wrote back nicely that my intention was to try it per the ad , but they said it ' s not possible . really ? [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-7.81,"[CLS] don ' t buy premium ! you have to pay for an entire year up ##front . and if you change your mind about the app , like i did , due to its odd inconsistent behavior with re ##ac ##cu ##ring tasks and reminder ##s . you ' re given 48 hours after the 7 day trial to qualify for a ref ##und . after that you ' re screwed . all most all of the issues started happening after that period ended . they obviously know they have problems with the app , just read their robotic responses to issues . any . do hides behind to ##s instead of helping [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-7.29,"[CLS] this could be an awesome app . it has great potential except for a few flaws that end up making it a super irritating app . i have reported these issues to the app ' s tech support several times only to get lame responses with no resolutions . the latest being that when items are entered into a category in this app , the app for some reason resorts the sequence of items in my list automatically . i put these items in the list in a certain order for a reason . i have other issues , but no more ##room ##her [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-5.76,"[CLS] the app felt good enough for me to buy the yearly subscription , which i did and paid th 15 ##a ‚ ¬ it requested . but a week later they charged me 17 more euros to my credit card for the same subscription and the same features so i basically paid 32 ##a ‚ ¬ . that is a sc ##am and unless i get my 17 ##a ‚ ¬ euros back i ' m gonna keep the rating at one star . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-5.69,[CLS] i downloaded it and log ##in with google i never did anything just logged in with google . it has asking me many time for auto de ##bit for subscription so i un ##ins ##tal ##led it . but still 5 min before rs 90 ##9 de ##bit ##ed from my account . this is the mug ##ing of money . i want ref ##und my money in my account . kindly [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-6.12,"[CLS] this app is useless , it works as a note ##pad with your tasks in it , it does not not ##ify you of tasks you suppose to do . when the time reaches it ' s silent doesn ' t pop up even though settings have permitted it to pop up and alert , even a simple alarm clock notified me of what to do . this app doesn ' t work . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.52,[CLS] really disappointed to find recurring tasks functionality is only available with the paid version . would not recommend . also it doesn ' t sort tasks by date and time properly . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-5.44,[CLS] loaded the trial then email ##ed about cancel ##ation but email bounced back . ended up paying $ 36 for an app i didn ' t want . no way to contact for ref ##und . be careful if you buy this . if you try the trial you ' re stuck paying for it ! [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.28,"[CLS] can ##t re - edit time that iv ##e set , necessary to del ##ete and red ##o the process . also , i need a reminder for every hour , and had to create 24 reminder ##s instead of 1 reminder running every hour . poorly designed . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.56,"[CLS] thanks to the developer for your quick response , however , this calendar is useless for our group without those features . . . we decided to go with business calendar pro . previous comments : wasn ' t able to see month or year view [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-5.32,"[CLS] this was originally a 5 star app , but now you can only use it by allowing it access to either your emails or facebook ! [UNK] ¡ [UNK] ¡ [UNK] ¡ i am definitely not happy to allow this so have deleted the app . would give it zero stars if i could . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.87,[CLS] couldn ' t even set up one single recurring reminder without being required to pay monthly subscription . get your greedy hand off it dev . it ' s only only a reminder app not a useful service . [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-8.83,"[CLS] hey , there is something wrong here . . . the billing i signed up for was for about 2 $ and not 35 $ from my account . . . that is my tuition up ##kee ##p fee . . . . i be ##rel ##y even use your app even . . . . please i need back my ref ##und . . . this isn ' t right . . . i would file a report to google play store . . this is totally sc ##am ##my . . . please you guys should totally avoid this app please . jesus christ i totally expected to be billed 2 $ . . . all of a sudden its 35 $ . . . no this is totally far fr ##m being fair please [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.43,"[CLS] i reported about this app due to incorrect billing but noon ##e answer me . please , return my money back because app took my money without my permission from my credit card . [SEP]"
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,NEGATIVE (0.00),POSITIVE,-4.24,[CLS] the fact that you have to pay so much to use something so simple isn ' t worth it to me . there are free apps of similar functionality . [SEP]
,,,,


UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 6206: character maps to <undefined>

In [5]:
print(reviews)
reviewsexpl2 = cls_explainer("I love you, I like you, I also kinda dislike you", class_name="NEGATIVE")

                userName                                          userImage  \
0          Andrew Thomas  https://lh3.googleusercontent.com/a-/AOh14GiHd...   
1           Craig Haines  https://lh3.googleusercontent.com/-hoe0kwSJgPQ...   
2          steven adkins  https://lh3.googleusercontent.com/a-/AOh14GiXw...   
3       Lars Panzerbjørn  https://lh3.googleusercontent.com/a-/AOh14Gg-h...   
4          Scott Prewitt  https://lh3.googleusercontent.com/-K-X1-YsVd6U...   
...                  ...                                                ...   
15741          Tammy Kay  https://lh3.googleusercontent.com/a-/AOh14GhYP...   
15742          Ysm Johan  https://lh3.googleusercontent.com/a-/AOh14Ggmd...   
15743      casey dearden  https://lh3.googleusercontent.com/a-/AOh14Gg2U...   
15744     Jerry G Tamate  https://lh3.googleusercontent.com/a-/AOh14GiTP...   
15745  Ahmed elsalamouni  https://lh3.googleusercontent.com/-9QSxVUhCoDI...   

                                                 co

NameError: name 'cls_explainer' is not defined

### Bonus: explanation on Q&A

In [None]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from transformers_interpret import QuestionAnsweringExplainer

tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

qa_explainer = QuestionAnsweringExplainer(
    model,
    tokenizer,
)

Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 14.0kB/s]
Downloading: 100%|██████████| 443/443 [00:00<00:00, 443kB/s]
Downloading: 100%|██████████| 226k/226k [00:00<00:00, 832kB/s] 
Downloading: 100%|██████████| 455k/455k [00:00<00:00, 1.01MB/s]
Downloading: 100%|██████████| 1.25G/1.25G [01:46<00:00, 12.6MB/s]


In [None]:
context = """
In Artificial Intelligence and machine learning, Natural Language Processing relates to the usage of machines to process and understand human language.
Many researchers currently work in this space.
"""

word_attributions = qa_explainer(
    "What is natural language processing ?",
    context,
)

In [None]:
word_attributions

{'start': [('[CLS]', 0.0),
  ('what', 0.917717061867351),
  ('is', 0.1338213861414588),
  ('natural', 0.08061731698784134),
  ('language', 0.013138230044765855),
  ('processing', 0.11135945915876394),
  ('?', 0.008580120143897719),
  ('[SEP]', -0.09646334048536524),
  ('in', 0.01545606320896786),
  ('artificial', 0.04720979393539745),
  ('intelligence', 0.02668683069056011),
  ('and', 0.016753607049298964),
  ('machine', -0.08429576178208778),
  ('learning', 0.004482511600234183),
  (',', -0.02401029530321712),
  ('natural', -0.0016759040611807657),
  ('language', 0.002681433280950527),
  ('processing', 0.06773119884848856),
  ('relates', 0.03884567065125608),
  ('to', 0.009783733105059877),
  ('the', -0.026650828552455926),
  ('usage', -0.010675208218277504),
  ('of', 0.015346547746357412),
  ('machines', -0.08277993141643336),
  ('to', 0.12861371511135003),
  ('process', 0.19540094680181358),
  ('and', 0.009943096833436344),
  ('understand', 0.0068367995968998665),
  ('human', 0.0502

In [None]:
qa_explainer.predicted_answer

'usage of machines to process and understand human language'

In [None]:
nn = qa_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
usage (21),usage of machines to process and understand human language (5.54),usage (21),1.68,"[CLS] what is natural language processing ? [SEP] in artificial intelligence and machine learning , natural language processing relates to the usage of machines to process and understand human language . many researchers currently work in this space . [SEP]"
,,,,
language (29),usage of machines to process and understand human language (5.40),language (29),1.67,"[CLS] what is natural language processing ? [SEP] in artificial intelligence and machine learning , natural language processing relates to the usage of machines to process and understand human language . many researchers currently work in this space . [SEP]"
,,,,


In [None]:
context = """
The Old Man and the Sea, short heroic novel by Ernest Hemingway, published in 1952 and awarded the 1953 
Pulitzer Prize for fiction. It was his last major work of fiction. The story centres on an aging fisherman 
who engages in an epic battle to catch a giant marlin.
"""

In [None]:
word_attributions = qa_explainer(
    "What is the story called?",
    context,
)

print (qa_explainer.predicted_answer)
nn = qa_explainer.visualize()

the old man and the sea


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
the (8),the old man and the sea (7.25),the (8),2.69,"[CLS] what is the story called ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,
sea (13),the old man and the sea (6.33),sea (13),2.54,"[CLS] what is the story called ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,


In [None]:
word_attributions = qa_explainer(
    "What is the story about?",
    context,
)

print (qa_explainer.predicted_answer)
nn = qa_explainer.visualize()

an aging fisherman


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
an (47),an aging fisherman (6.72),an (47),1.55,"[CLS] what is the story about ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,
fisherman (49),an aging fisherman (6.55),fisherman (49),2.77,"[CLS] what is the story about ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,


In [None]:
word_attributions = qa_explainer(
    "which prize has the novel received?",
    context,
)

print (qa_explainer.predicted_answer)
nn = qa_explainer.visualize()

pulitzer prize for fiction


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
pulitzer (30),pulitzer prize for fiction (6.47),pulitzer (30),2.22,"[CLS] which prize has the novel received ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,
fiction (33),pulitzer prize for fiction (6.55),fiction (33),1.54,"[CLS] which prize has the novel received ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,


In [None]:
word_attributions = qa_explainer(
    "Why is the fish in the novel?",
    context,
)

print (qa_explainer.predicted_answer)
nn = qa_explainer.visualize()

to catch a giant mar ##lin


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
to (58),to catch a giant mar ##lin (4.87),to (58),2.28,"[CLS] why is the fish in the novel ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,
##lin (63),to catch a giant mar ##lin (3.90),##lin (63),0.78,"[CLS] why is the fish in the novel ? [SEP] the old man and the sea , short heroic novel by ernest hemingway , published in 1952 and awarded the 1953 pulitzer prize for fiction . it was his last major work of fiction . the story centres on an aging fisherman who engages in an epic battle to catch a giant mar ##lin . [SEP]"
,,,,
