In [1]:
from fastai.text.all import *

In [2]:
lang = 'de'
wiki = f'{lang}wiki'
base_path = Path('data')
path = base_path/wiki
data_path = path/'germeval'
class_path = path/'model'/'class'

# Load classification learners

In [3]:
path_fwd = f'{class_path}/fwd/export.pkl'
learn_fwd = load_learner(path_fwd)

In [4]:
path_bwd = f'{class_path}/bwd/export.pkl'
learn_bwd = load_learner(path_bwd)

# Get predictions for simple texts

```
Text: Komisch das die Realitätsverweigerung immer von linken erbärmlichen Correctiv Accounts ausgeht...  
label: OFFENSE  
label_fine: INSULT
```

In [5]:
text = 'Komisch das die Realitätsverweigerung immer von linken erbärmlichen Correctiv Accounts ausgeht...'
pred = learn_fwd.predict(text)
pred

('INSULT', tensor(1), tensor([0.3995, 0.5468, 0.0500, 0.0037]))

# Evaluation on GermEval2019 Task 2 (Fine)

## Load GermEval2019 data

In [6]:
names = ['text','label','label_fine']

In [7]:
df_test = pd.read_csv(data_path/'germeval2019/germeval2019GoldLabelsSubtask1_2.txt',
                sep ='\t', names=names)

In [8]:
def clean_text(text):
    text = re.sub('@\w+', ' ', text)
    text = re.sub(r'''(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))''', " ", text)
    text = ' '.join(text.split())
    return text
df_test['text'] = df_test['text'].apply(clean_text)

## Forward model

In [9]:
dl_fwd = learn_fwd.dls.test_dl(df_test, with_labels=True)
preds_fwd = learn_fwd.get_preds(dl=dl_fwd)
accuracy(*preds_fwd)

TensorBase(0.7301)

## Backward model

In [10]:
dl_bwd = learn_bwd.dls.test_dl(df_test, with_labels=True)
preds_bwd = learn_bwd.get_preds(dl=dl_bwd)
accuracy(*preds_bwd)

TensorBase(0.7374)

## Ensemble Forward + Backward

In [11]:
avg = 'macro'
precision = Precision(average=avg)
recall = Recall(average=avg)
f1score = F1Score(average=avg)

In [12]:
preds = (preds_fwd[0] + preds_bwd[0]) / 2
a = accuracy(preds, preds_fwd[1])
a

TensorBase(0.7456)

In [13]:
p = precision(torch.argmax(preds, axis=1), preds_fwd[1])
p

0.5863656019734342

In [14]:
r = recall(torch.argmax(preds, axis=1), preds_fwd[1])
r

0.4930981710582918

In [15]:
f1 = f1score(torch.argmax(preds, axis=1), preds_fwd[1])
f1

0.5254420614467545

Save stats

In [16]:
stats = {
    'accuracy': float(a),
    'precision': p,
    'recall': r,
    'f1score': f1
}

with open(f'{class_path}/inference_stats.json', 'w') as f:
    json.dump(stats, f, ensure_ascii=False, indent=4)

# Interpreation with fastinference

see [fastinference](https://muellerzr.github.io/fastinference/)

`intrinsic_attention()` shows which tokens contribute most to the classification.   
Red tokens = small contribution  
Grenn tokens = high contribution

In [18]:
from fastinference.inference.text import intrinsic_attention
learn_fwd.intrinsic_attention(text)

In [19]:
learn_fwd.predict(text)

[['INSULT'],
 array([[0.39946818, 0.5468409 , 0.04996781, 0.0037231 ]], dtype=float32)]