In [14]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from ferret import Benchmark
from statistics import mean
import numpy as np

In [15]:
import tensorflow as tf
import torch

In [16]:
import json
from tabulate import tabulate

In [219]:
name = "/beegfs/scratch/rsari/BertSeqCA/checkpoint-10000/"
model = AutoModelForSequenceClassification.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

In [18]:
id2tag = {0: 'Anrede', 1: 'Diagnosen', 2: 'AllergienUnverträglichkeitenRisiken', 3: 'Anamnese', 4: 'Medikation', 5: 'KUBefunde', 6: 'Befunde', 7: 'EchoBefunde', 8: 'Zusammenfassung', 9: 'Mix', 10: 'Abschluss'}
tag2id = {tag: id for id, tag in id2tag.items()}

labels = list(id2tag.values())
print(" ".join(labels))

Anrede Diagnosen AllergienUnverträglichkeitenRisiken Anamnese Medikation KUBefunde Befunde EchoBefunde Zusammenfassung Mix Abschluss


# Which of the interpretability methods is more faithful ?

**Approach:**
1. Calculate for IG & SHAP each Comprehensiveness & Sufficiency
    1. Which of them performs better in each ?
        1. Reasons?
    2. Which labels are conspicious for scoring high/low or 
        1. Metric specific tendencies ?
        2. Data Bias ?
    3. Finally IG or SHAP "better" overall ?
        1. Pros/Cons of approach with Faithfulness metric

In [220]:
bench = Benchmark(model, tokenizer)

In [2]:
with open("data.p", "rb") as f:
    data = pickle.load(f)
    
[(k, len(v)) for k,v in data.items()]

[('Anrede', 11),
 ('Diagnosen', 10),
 ('AllergienUnverträglichkeitenRisiken', 13),
 ('Anamnese', 14),
 ('Medikation', 10),
 ('KUBefunde', 11),
 ('Befunde', 12),
 ('EchoBefunde', 17),
 ('Zusammenfassung', 13),
 ('Mix', 21),
 ('Abschluss', 16)]

In [3]:
with open("ig.p", "rb") as f:
    ig = pickle.load(f)
    
with open("shap.p", "rb") as f:
    shap = pickle.load(f)
    
with open("eva_ig.p", "rb") as f:
    eva_ig = pickle.load(f)
    
with open("eva_shap.p", "rb") as f:
    eva_shap = pickle.load(f)

In [11]:
compr, suff = {l: None for l in ["Anrede"]}, {l: None for l in ["Anrede"]}

for l in labels:
    right = [data[l].index(d) for d in data[l] if d[0] == l]
    wrong = [data[l].index(d) for d in data[l] if d[0] != l]
    print(l, right, wrong)
    compr_ig = [e.score for eva in eva_ig[l] for e in eva.evaluation_scores if e.name=="aopc_compr"]
    compr_shap = [e.score for eva in eva_shap[l] for e in eva.evaluation_scores if e.name=="aopc_compr"]
    
    suff_ig = [e.score for eva in eva_ig[l] for e in eva.evaluation_scores if e.name=="aopc_suff"]
    suff_shap = [e.score for eva in eva_shap[l] for e in eva.evaluation_scores if e.name=="aopc_suff"]
    
    compr[l] = {"IG": (np.nanmean([e for e in compr_ig if compr_ig.index(e) in right]), np.nanmean([e for e in compr_ig if compr_ig.index(e) in wrong])), 
                "SHAP":(np.nanmean([e for e in compr_shap if compr_shap.index(e) in right]), np.nanmean([e for e in compr_shap if compr_shap.index(e) in wrong]))}
    suff[l] = {"IG": (np.nanmean([e for e in suff_ig if suff_ig.index(e) in right]), np.nanmean([e for e in suff_ig if suff_ig.index(e) in wrong])), 
                "SHAP":(np.nanmean([e for e in suff_shap if suff_shap.index(e) in right]), np.nanmean([e for e in suff_shap if suff_shap.index(e) in wrong]))}

Anrede [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [10]
Diagnosen [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] []
AllergienUnverträglichkeitenRisiken [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [10, 11, 12]
Anamnese [0, 1, 2, 3, 4, 6, 9, 10, 11, 12] [5, 7, 8, 13]
Medikation [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] []
KUBefunde [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] [4]
Befunde [0, 1, 2, 3, 4, 5, 6, 8, 9, 10] [7, 11]
EchoBefunde [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] [10, 11, 12, 13, 14, 15, 16]
Zusammenfassung [0, 1, 2, 3, 4, 6, 7, 8, 9, 10] [5, 11, 12]
Mix [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [0, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
Abschluss [0, 2, 3, 4, 5, 6, 7, 8, 9, 11] [1, 10, 12, 13, 14, 15]


Mean of empty slice
Mean of empty slice
Mean of empty slice
Mean of empty slice


## **2. Sufficiency**
Measures if top k% (10 step: 10-100) tokens in explanation are sufficient for the right prediction: $f(x)_j - f(r_j)_j$  
**Lower score indicates that exclusion of tokens in $r_j$ highlighted by explainer are actually important for prediction.**

### 2.1. SHAP

In [12]:
table = [(l, [[round(s, 2) for s in v] for k, v in suff[l].items() if k == "SHAP"]) for l in labels]
table = sorted(table, key = lambda x: x[1][0][0], reverse = False)
table.insert(0, ["Label", "Sufficiency mean scores"])

print(tabulate(table, headers="firstrow"))

Label                                Sufficiency mean scores
-----------------------------------  -------------------------
Zusammenfassung                      [[0.0, 0.01]]
Anamnese                             [[0.02, 0.05]]
Befunde                              [[0.02, 0.02]]
EchoBefunde                          [[0.04, nan]]
AllergienUnverträglichkeitenRisiken  [[0.06, nan]]
Medikation                           [[0.07, nan]]
Anrede                               [[0.1, nan]]
Abschluss                            [[0.13, 0.35]]
Diagnosen                            [[0.19, nan]]
KUBefunde                            [[0.26, 0.12]]
Mix                                  [[0.4, 0.1]]


In [99]:
len(eva_shap["Zusammenfassung"]), [(len(eva.explanation.tokens[1:-1]), eva.explanation.text, eva.evaluation_scores[1]) for eva in eva_shap["Zusammenfassung"]]

(10,
 [(28,
   'Die cMRT am Folgetag zeigte mehrere kleine Infarkte im Hirnstamm sowie rechts in den Stammganglien und bds.',
   Evaluation(name='aopc_suff', score=-0.002510023)),
  (17,
   'Röntgenologisch wurde der V.a. eine Stauungspneumonie gestellt.',
   Evaluation(name='aopc_suff', score=9.805957e-05)),
  (85,
   'Es erging die Maßgabe das Konzept vom letzten stationären Aufenthalt fortzuführen -LRB- Vorstellung in der Thoraxklinik -RRB- und die damals begonnene Therapie mit Methylprednisolon wöchentlich um 2,5 mg weiter auszuschleichen -LRB- letzte Reduktion am <[Pseudo] 14/10/2033> auf 7,5 mg -RRB- .',
   Evaluation(name='aopc_suff', score=0.00029241375)),
  (34,
   'Zur Vorbereitung auf die MitraClip erfolgte am <[Pseudo] 21/06/2034> eine Farbduplexsonographie der Karotiden.',
   Evaluation(name='aopc_suff', score=0.005224265)),
  (41,
   'Wir bitten um eine Trinkmengenrestriktion von 1,5 l/d und regelmäßige laborchemische Kontrollen der Elektrolyte durch den Hausarzt mit ggf.

#### SHAP - Best Label: Zusammenfassung

In [262]:
sent = data["Zusammenfassung"][1][1]
score = bench.score(sent)
target = tag2id["Zusammenfassung"]
metr = bench.explain(sent, target=target)[0] ### SHAP ###
scores = metr.scores[1:-1]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: Röntgenologisch wurde der V.a. eine Stauungspneumonie gestellt. 	Score: 1.0
Filtered: [('[MASK]', -0.1063335279302019), ('##ologisch', 0.057876511226732985), ('wurde', 0.44221082629923775), ('der', 0.10508947177137497), ('[MASK]', -0.018277182651793344), ('[MASK]', -0.003599826201779736), ('[MASK]', -0.026887146833413894), ('[MASK]', -0.012654872553857098), ('eine', 0.08299531199478645), ('[MASK]', -0.008186067217991968), ('[MASK]', -0.009768548961348754), ('##p', 0.021391422173720247), ('##ne', 0.0075568083103108846), ('[MASK]', -0.02342996950130498), ('##onie', 0.0028170837737001328), ('gestellt', 0.057691346262249435), ('.', 0.013234024963402305)]

1 important token(s) only: 'wurde' affects original score: 0.0 | Labeled: Zusammenfassung: 0.994884192943573
2 important token(s) only: 'wurde der' affects original score: -0.0 | Labeled: Zusammenfassung: 0.9962839484214783
3 important token(s) only: 'wurde der eine' affects original score: -0.0 | Labeled: Zusammenfassu

### Ablation Study: Inclusion of neg. attr. tokens

In [263]:
sent = data["Zusammenfassung"][1][1]
score = bench.score(sent)
target = tag2id["Zusammenfassung"]
metr = bench.explain(sent, target=target)[0] ### SHAP ###
scores = [abs(i) for i in metr.scores[1:-1]]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: Röntgenologisch wurde der V.a. eine Stauungspneumonie gestellt. 	Score: 1.0
Filtered: [('Röntgen', -0.1063335279302019), ('##ologisch', 0.057876511226732985), ('wurde', 0.44221082629923775), ('der', 0.10508947177137497), ('V', -0.018277182651793344), ('.', -0.003599826201779736), ('a', -0.026887146833413894), ('.', -0.012654872553857098), ('eine', 0.08299531199478645), ('Stau', -0.008186067217991968), ('##ungs', -0.009768548961348754), ('##p', 0.021391422173720247), ('##ne', 0.0075568083103108846), ('##um', -0.02342996950130498), ('##onie', 0.0028170837737001328), ('gestellt', 0.057691346262249435), ('.', 0.013234024963402305)]

2 important token(s) only: 'Röntgen wurde' affects original score: 0.17 | Labeled: Zusammenfassung: 0.829968273639679
3 important token(s) only: 'Röntgen wurde der' affects original score: 0.02 | Labeled: Zusammenfassung: 0.9789688587188721
5 important token(s) only: 'Röntgenologisch wurde der eine' affects original score: -0.0 | Labeled: Zus

<span style="color:purple">**! Inclusion of negative tokens no significant difference** </span>

In [260]:
sent = data["Zusammenfassung"][1][1]
score = bench.score(sent)
target = tag2id["Zusammenfassung"]
metr = bench.explain(sent, target=target, normalize_scores=True)[4] ### IG ###
scores = metr.scores[1:-1]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

! Prediction swayed by removal of negative contributing tokens to label:  Befunde
Original sentence: Röntgenologisch wurde der V.a. eine Stauungspneumonie gestellt. 	Score: 1.0
Filtered: [('Röntgen', 0.13422073809435256), ('[MASK]', -0.0792783709766035), ('[MASK]', -0.3623967811402128), ('[MASK]', -0.037282700167099045), ('[MASK]', -0.059239771203730175), ('.', 0.01484986202020697), ('a', 0.03773797687690534), ('[MASK]', -0.006395933277709508), ('[MASK]', -0.018892764247409405), ('Stau', 0.031500919813153686), ('[MASK]', -0.01029594943450585), ('##p', 0.008822604181263963), ('##ne', 0.00554474005959565), ('[MASK]', -0.010698659574987624), ('##onie', 0.04106052900256171), ('[MASK]', -0.10094436338971795), ('[MASK]', -0.005396102130368896)]

1 important token(s) only: 'Röntgen' affects original score: 0.99 | Labeled: Befunde: 0.9646967053413391
1 important token(s) only: 'Röntgen' affects original score: 0.99 | Labeled: Befunde: 0.9646967053413391
2 important token(s) only: 'Röntgenonie'

In [204]:
bench.show_table(bench.explain(data["Zusammenfassung"][1][1], target=tag2id["Zusammenfassung"]))

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Unnamed: 0,Röntgen,##ologisch,wurde,der,V,.,a,..1,eine,Stau,##ungs,##p,##ne,##um,##onie,gestellt,..2
Partition SHAP,-0.11,0.06,0.44,0.11,-0.02,-0.0,-0.03,-0.01,0.08,-0.01,-0.01,0.02,0.01,-0.02,0.0,0.06,0.01
LIME,-0.08,0.05,0.37,0.08,-0.03,0.05,0.02,-0.03,0.08,0.02,0.03,0.0,0.06,0.02,0.01,-0.02,-0.03
Gradient,0.16,0.11,0.09,0.04,0.05,0.03,0.06,0.03,0.06,0.04,0.02,0.02,0.02,0.02,0.04,0.08,0.03
Gradient (x Input),0.12,0.05,-0.13,0.02,0.05,0.02,0.09,-0.05,0.12,0.01,0.01,0.03,0.04,-0.01,0.03,0.09,0.06
Integrated Gradient,0.13,-0.08,-0.36,-0.04,-0.06,0.01,0.04,-0.01,-0.02,0.03,-0.01,0.01,0.01,-0.01,0.04,-0.1,-0.01
Integrated Gradient (x Input),-0.02,0.04,0.3,0.07,-0.01,0.01,-0.01,-0.01,0.18,0.03,0.03,0.03,0.04,0.04,0.03,0.1,0.05


#### SHAP - Worst Label: Mix

In [227]:
len(eva_shap["Mix"]), [(len(eva.explanation.tokens[1:-1]), eva.explanation.text, eva.evaluation_scores[1]) for eva in eva_shap["Mix"]]

(21,
 [(23,
   'MRE: VRE rektal B-DATE I-DATE I-DATE',
   Evaluation(name='aopc_suff', score=-0.050719053)),
  (30,
   '- Wiedervorstellung zur nächsten Schrittmacherkontrolluntersuchung in unserer ORG am <[Pseudo] 05/11/2037> um 10 Uhr',
   Evaluation(name='aopc_suff', score=0.09525423)),
  (9, 'Externer Zuweiser: CPU', Evaluation(name='aopc_suff', score=0.3746343)),
  (34,
   'Wir empfehlen regelmäßige angiologische Kontrolluntersuchungen in der Praxis des betreuenden B-SALUTE B-TITLE B-PER',
   Evaluation(name='aopc_suff', score=0.26184133)),
  (8,
   '- Kostaufbau nach Ernährungskonsil',
   Evaluation(name='aopc_suff', score=0.55935806)),
  (7,
   '- Übernahme Herzchirurgie',
   Evaluation(name='aopc_suff', score=0.8258164)),
  (12,
   '- Kaliumkontrolle und klinische Kontrolle morgen beim Hausarzt',
   Evaluation(name='aopc_suff', score=0.37734532)),
  (22,
   '- Termin in der kardiologischen Ambulanz am B-DATE, 10:30 Uhr',
   Evaluation(name='aopc_suff', score=0.24765943)),
  (11

In [259]:
sent = data["Mix"][4][1]
score = bench.score(sent)
target = tag2id["Mix"]
metr = bench.explain(sent, target=target)[0] ### SHAP ###
scores = metr.scores[1:-1]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: - Kostaufbau nach Ernährungskonsil 	Score: 0.94
Filtered: [('-', 0.018596554615035184), ('Kost', 0.4052477708745033), ('##aufbau', 0.0184040195059656), ('nach', 0.013779742537580682), ('Ernährung', 0.36493172497281945), ('##skon', 0.048083972730894244), ('[MASK]', -0.038987453409692736), ('##il', 0.0919686855660894)]

1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
2 important token(s) only: 'Kost Ernährung' affects original score: 0.91 | Labeled: Befunde: 0.46513277292251587
3 important token(s) only: 'Kost Ernährungil' affects original score: 0.93 | Labeled: Befunde: 0.8804022073745728
4 important token(s) only: 'Kost Ernährungskonil' affects original score: 0.91 | Labeled: Medikation: 0.5377553105354309
4 important token(s) only: 'Kost Ernährungskonil' affects original score: 0.91 | Labeled: M

### Ablation Study

In [257]:
sent = data["Mix"][4][1]
score = bench.score(sent)
target = tag2id["Mix"]
metr = bench.explain(sent, target=target)[0] ### SHAP ###
scores = [abs(i) for i in metr.scores[1:-1]]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: - Kostaufbau nach Ernährungskonsil 	Score: 0.94
Filtered: [('-', 0.018596554615035184), ('Kost', 0.4052477708745033), ('##aufbau', 0.0184040195059656), ('nach', 0.013779742537580682), ('Ernährung', 0.36493172497281945), ('##skon', 0.048083972730894244), ('##s', -0.038987453409692736), ('##il', 0.0919686855660894)]

1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
2 important token(s) only: 'Kost Ernährung' affects original score: 0.91 | Labeled: Befunde: 0.46513277292251587
2 important token(s) only: 'Kost Ernährung' affects original score: 0.91 | Labeled: Befunde: 0.46513277292251587
3 important token(s) only: 'Kost Ernährungil' affects original score: 0.93 | Labeled: Befunde: 0.8804022073745728
4 important token(s) only: 'Kost Ernährungskonil' affects original score: 0.91 | Labeled: Medikation: 0.5377553105354309
5 important token(s) only: 'Kost Ernährungskonsil' affects original score: 0.92 | Labeled: B

<span style="color:purple">**! Inclusion of negative tokens worsens score such that by including 's' & 'Röntgen' score is undermined** </span>

### 2.2 IG

In [256]:
table = [(l, [[round(s, 2) for s in v] for k, v in suff[l].items() if k=="IG"]) for l in labels]
table = sorted(table, key = lambda x: x[1][0][0], reverse = False)
table.insert(0, ["Label", "Sufficiency mean scores"])

print(tabulate(table, headers="firstrow"))

Label                                Sufficiency mean scores
-----------------------------------  -------------------------
EchoBefunde                          [[0.37, 0.01]]
Befunde                              [[0.41, -0.19]]
Medikation                           [[0.43, nan]]
Zusammenfassung                      [[0.44, -0.54]]
Abschluss                            [[0.45, 0.07]]
Anamnese                             [[0.46, -0.02]]
AllergienUnverträglichkeitenRisiken  [[0.6, 0.0]]
Diagnosen                            [[0.66, nan]]
Anrede                               [[0.71, 0.01]]
KUBefunde                            [[0.77, 0.0]]
Mix                                  [[0.8, 0.04]]


In [228]:
len(eva_ig["EchoBefunde"]), [(len(eva.explanation.tokens[1:-1]), eva.explanation.text, eva.evaluation_scores[1]) for eva in eva_ig["EchoBefunde"]]

(17,
 [(19,
   'Echokardiographie vom <[Pseudo] 11/04/2032:>',
   Evaluation(name='aopc_suff', score=0.9333624)),
  (38,
   'Kein relevanter Perikarderguss nachweisbar, V. cava inferior normal weit -LRB- 14 mm -RRB- und atemvariabel.',
   Evaluation(name='aopc_suff', score=0.50635123)),
  (45,
   'V. cava inferior weiterhin erweitert auf 25 mm und nicht atemvariabel. Pleuraerguss rechts basal, ca. 4-5 cm Schichtdicke. Kein Pleuraerguss links.',
   Evaluation(name='aopc_suff', score=0.5677398)),
  (27,
   'Untersuchung am Bett auf Kardio-Intensiv. Vorbekannt deutlich reduzierte Schallbedingungen, v.a. von parasternal.',
   Evaluation(name='aopc_suff', score=0.32701594)),
  (24,
   'Transösophageale Echokardiographie vom <[Pseudo] 06/07/2036:>',
   Evaluation(name='aopc_suff', score=0.17271966)),
  (19,
   'Echokardiographie vom <[Pseudo] 04/11/2037:>',
   Evaluation(name='aopc_suff', score=0.5463312)),
  (180,
   'Deutlich dilatierter linker Ventrikel mit hochgradig verminderter systoli

#### IG - Best Label: EchoBefunde

In [267]:
sent = data["EchoBefunde"][3][1]
score = bench.score(sent)
target = tag2id["EchoBefunde"]
metr = bench.explain(sent, target=target)[4] ### IG ###
scores = metr.scores[1:-1]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}""")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

! Prediction swayed by removal of negative contributing tokens to label:  EchoBefunde
Original sentence: Untersuchung am Bett auf Kardio-Intensiv. Vorbekannt deutlich reduzierte Schallbedingungen, v.a. von parasternal. 	Score: 0.98
Filtered: [('Untersuchung', 0.09259950602257182), ('[MASK]', -0.04374447312125009), ('Bett', 0.0896032875732434), ('auf', 0.05463458143151262), ('Kard', 0.02933618097377146), ('[MASK]', -0.011090570313687558), ('-', 0.020506392251137623), ('[MASK]', -0.035195529380337806), ('[MASK]', -0.021597058514531794), ('[MASK]', -0.042405016230841475), ('##kann', 0.021550671515907862), ('##t', 0.012173796778867678), ('deutlich', 0.03536102810884739), ('[MASK]', -0.1213924959288616), ('[MASK]', -0.0037369807670049418), ('Schall', 0.017841558589441305), ('[MASK]', -0.10196366085702138), (',', 0.021705164255363778), ('v', 0.03520246030776116), ('.', 0.031911060834966955), ('[MASK]', -0.03222631493092209), ('[MASK]', -0.013086820152932591), ('von', 0.04546060741524235), ('

### Ablation Study:

In [268]:
sent = data["EchoBefunde"][3][1]
score = bench.score(sent)
target = tag2id["EchoBefunde"]
metr = bench.explain(sent, target=target)[4] ### IG ###
scores = [abs(i) for i in metr.scores[1:-1]]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}""")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: Untersuchung am Bett auf Kardio-Intensiv. Vorbekannt deutlich reduzierte Schallbedingungen, v.a. von parasternal. 	Score: 0.98
Filtered: [('Untersuchung', 0.09259950602257182), ('am', -0.04374447312125009), ('Bett', 0.0896032875732434), ('auf', 0.05463458143151262), ('Kard', 0.02933618097377146), ('##io', -0.011090570313687558), ('-', 0.020506392251137623), ('Intensiv', -0.035195529380337806), ('.', -0.021597058514531794), ('Vorbe', -0.042405016230841475), ('##kann', 0.021550671515907862), ('##t', 0.012173796778867678), ('deutlich', 0.03536102810884739), ('reduziert', -0.1213924959288616), ('##e', -0.0037369807670049418), ('Schall', 0.017841558589441305), ('##bedingungen', -0.10196366085702138), (',', 0.021705164255363778), ('v', 0.03520246030776116), ('.', 0.031911060834966955), ('a', -0.03222631493092209), ('.', -0.013086820152932591), ('von', 0.04546060741524235), ('par', -0.017486633759382773), ('##aster', -0.004300270830336496), ('##nal', -0.020759775117881137),

<span style="color:purple">**! Inclusion of negative tokens in first step improves score substantially such that correct label is predicted** </span>

#### IG - Worst Label: Mix

In [230]:
len(eva_ig["Mix"]), [(len(eva.explanation.tokens[1:-1]), eva.explanation.text, eva.evaluation_scores[1]) for eva in eva_ig["Mix"]]

(21,
 [(23,
   'MRE: VRE rektal B-DATE I-DATE I-DATE',
   Evaluation(name='aopc_suff', score=-0.00085239566)),
  (30,
   '- Wiedervorstellung zur nächsten Schrittmacherkontrolluntersuchung in unserer ORG am <[Pseudo] 05/11/2037> um 10 Uhr',
   Evaluation(name='aopc_suff', score=0.9543541)),
  (9, 'Externer Zuweiser: CPU', Evaluation(name='aopc_suff', score=0.9800445)),
  (34,
   'Wir empfehlen regelmäßige angiologische Kontrolluntersuchungen in der Praxis des betreuenden B-SALUTE B-TITLE B-PER',
   Evaluation(name='aopc_suff', score=0.98169893)),
  (8,
   '- Kostaufbau nach Ernährungskonsil',
   Evaluation(name='aopc_suff', score=0.9295922)),
  (7,
   '- Übernahme Herzchirurgie',
   Evaluation(name='aopc_suff', score=0.9663508)),
  (12,
   '- Kaliumkontrolle und klinische Kontrolle morgen beim Hausarzt',
   Evaluation(name='aopc_suff', score=0.4126138)),
  (22,
   '- Termin in der kardiologischen Ambulanz am B-DATE, 10:30 Uhr',
   Evaluation(name='aopc_suff', score=0.22014639)),
  (11,

In [269]:
sent = data["Mix"][4][1]
score = bench.score(sent)
target = tag2id["Mix"]
metr = bench.explain(sent, target=target)[4] ### IG ###
scores = metr.scores[1:-1]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

! Prediction swayed by removal of negative contributing tokens to label:  Befunde
Original sentence: - Kostaufbau nach Ernährungskonsil 	Score: 0.94
Filtered: [('[MASK]', -0.17239096756130623), ('Kost', 0.28357038194503126), ('##aufbau', 0.027203998969917657), ('[MASK]', -0.08204218859432016), ('Ernährung', 0.02043245855163943), ('##skon', 0.16957146868890877), ('##s', 0.19929151477819718), ('##il', 0.0013163738851477715)]

1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
2 important token(s) only: 'Kosts' affects original score: 0.93 | Labeled: Befunde: 0.6500313878059387
2 important token(s) only: 'Kosts' affects original score: 0.93 | Labeled: Befunde: 0.6500313878059387
3 important token(s) only: 'Kostskons' affects original score: 0.92 | Labeled: Befunde: 0.6947952508926392
4 important token(s) only: 'Kostaufba

### Ablation

In [270]:
sent = data["Mix"][4][1]
score = bench.score(sent)
target = tag2id["Mix"]
metr = bench.explain(sent, target=target)[4] ### IG ####
scores = [abs(i) for i in metr.scores[1:-1]]
tokens = [t if scores[i]>=0 else "[MASK]" for i, t in enumerate(metr.tokens[1:-1])] 
try:
    assert score[f"LABEL_{target}"] <= bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens))))[f"LABEL_{target}"]
except:
    print("! Prediction swayed by removal of negative contributing tokens to label: ", 
          id2tag[np.argmax(list(bench.score(tokenizer.convert_tokens_to_string(list(filter(lambda x:x!="[MASK]", tokens)))).values()))])
scores = [s for s in scores if s>=0]
aggr = []

print(f"Original sentence: {sent} \tScore: {round(score[f'LABEL_{target}'],2)}\nFiltered: {[(t, s) for t, s in zip(tokens, metr.scores[1:-1])]}\n")
for i in np.arange(.1, 1.1, .1):
    sect = round(len(scores)*i)
    indices = np.argsort(scores)[::-1][:sect]
    filtered = list(filter(lambda x: x!= "[MASK]", tokens))
    # Get top k tokens
    top_tok = [filtered[i] for i in sorted(indices)]
    s = tokenizer.convert_tokens_to_string(top_tok)
    new = score[f"LABEL_{target}"] - bench.score(s)[f"LABEL_{target}"]
    print(f"{sect} important token(s) only: '{s}' affects original score: {round(new, 2)} | Labeled: {id2tag[np.argmax(list(bench.score(s).values()))]}: {np.max(list(bench.score(s).values()))}")
    if sect >= 1:
        aggr.append(new)

print(f"\nMean of all scores: {round(mean(set(aggr)), 2)}")

Explainer:   0%|          | 0/6 [00:00<?, ?it/s]

Original sentence: - Kostaufbau nach Ernährungskonsil 	Score: 0.94
Filtered: [('-', -0.17239096756130623), ('Kost', 0.28357038194503126), ('##aufbau', 0.027203998969917657), ('nach', -0.08204218859432016), ('Ernährung', 0.02043245855163943), ('##skon', 0.16957146868890877), ('##s', 0.19929151477819718), ('##il', 0.0013163738851477715)]

1 important token(s) only: 'Kost' affects original score: 0.94 | Labeled: Zusammenfassung: 0.48217278718948364
2 important token(s) only: 'Kosts' affects original score: 0.93 | Labeled: Befunde: 0.6500313878059387
2 important token(s) only: 'Kosts' affects original score: 0.93 | Labeled: Befunde: 0.6500313878059387
3 important token(s) only: '- Kosts' affects original score: 0.91 | Labeled: Befunde: 0.9212023019790649
4 important token(s) only: '- Kostskons' affects original score: 0.9 | Labeled: Diagnosen: 0.7408345341682434
5 important token(s) only: '- Kost nachskons' affects original score: 0.88 | Labeled: Befunde: 0.8905332088470459
6 important tok

<span style="color:purple">**! Inclusion of negative tokens improves score throughout last half of total steps such that correct label is predicted in last two of them and beforehand scores for false labels are reduced** </span>