In [1]:
from fastai.text.all import *

In [2]:
lang = 'de'
wiki = f'{lang}wiki'
base_path = Path('data')
path = base_path/wiki
data_path = path/'germeval'

# Load classification learners

In [3]:
path_fwd = 'data/dewiki/model/class/fwd/export.pkl'
learn_fwd = load_learner(path_fwd)

In [4]:
path_bwd = 'data/dewiki/model/class/bwd/export.pkl'
learn_bwd = load_learner(path_bwd)

# Get predictions for simple texts

Text: Komisch das die Realitätsverweigerung immer von linken erbärmlichen Correctiv Accounts ausgeht...  
label: OFFENSE  
label_fine: INSULT

In [5]:
text = 'Komisch das die Realitätsverweigerung immer von linken erbärmlichen Correctiv Accounts ausgeht...'
pred = learn_fwd.predict(text)
pred

('INSULT', tensor(1), tensor([0.3995, 0.5468, 0.0500, 0.0037]))

# Evaluation on GermEval2019 Task 2 (Fine)

## Load GermEval2019 data

In [6]:
names = ['text','label','label_fine']

In [7]:
df_test = pd.read_csv(data_path/'germeval2019/germeval2019GoldLabelsSubtask1_2.txt',
                sep ='\t', names=names)

In [8]:
def clean_text(text):
    text = re.sub('@\w+', ' ', text)
    text = re.sub(r'''(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))''', " ", text)
    text = ' '.join(text.split())
    return text
df_test['text'] = df_test['text'].apply(clean_text)

## Forward model

In [9]:
dl_fwd = learn_fwd.dls.test_dl(df_test, with_labels=True)
preds_fwd = learn_fwd.get_preds(dl=dl_fwd)
accuracy(*preds_fwd)

TensorBase(0.7301)

## Backward model

In [10]:
dl_bwd = learn_bwd.dls.test_dl(df_test, with_labels=True)
preds_bwd = learn_bwd.get_preds(dl=dl_bwd)
accuracy(*preds_bwd)

TensorBase(0.7374)

## Ensemble Forward + Backward

In [11]:
preds = (preds_fwd[0] + preds_bwd[0]) / 2
accuracy(preds, preds_fwd[1])

TensorBase(0.7456)

In [12]:
avg = 'macro'
precision = Precision(average=avg)
recall = Recall(average=avg)
f1score = F1Score(average=avg)

In [13]:
precision(torch.argmax(preds, axis=1), preds_fwd[1])

0.5863656019734342

In [14]:
recall(torch.argmax(preds, axis=1), preds_fwd[1])

0.4930981710582918

In [15]:
f1score(torch.argmax(preds, axis=1), preds_fwd[1])

0.5254420614467545

# Interpreation with fastinference

Interpretation of the predictions with [fastinference](https://muellerzr.github.io/fastinference/). Installation:  
`pip install fastinference`

In [16]:
from fastinference.inference.text import intrinsic_attention
learn_fwd.intrinsic_attention(text, class_id=np.argmax(pred[1]))