## Cased Multilingual Test

Let's load a BERT model in English and test it against the other languages

In [3]:
%load_ext autoreload
%autoreload 2
import os
from datetime import datetime
import fire
import torch
from torchtext import data
import torch.nn as nn
from transformers import (
    AdamW, BertForSequenceClassification, BertTokenizer,
    get_constant_schedule_with_warmup
)

from offenseval.nn import (
    Tokenizer,
    train, evaluate, train_cycle, save_model, load_model
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, TEXT = load_model("../models/bert.cased.sample.mean06.ft.pt", device)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
from offenseval.nn import evaluate_dataset

loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/olid/test_a.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.780  Acc: 83.95% Macro F1: 0.767 Pos F1 0.637 Neg F1 0.897


## Danish

In [5]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Danish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.538  Acc: 88.68% Macro F1: 0.637 Pos F1 0.337 Neg F1 0.938


## Turkish

In [6]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Turkish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.762  Acc: 80.47% Macro F1: 0.485 Pos F1 0.078 Neg F1 0.891


## Arabic

In [7]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Arabic/offenseval-ar-dev-v1.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.691  Acc: 82.00% Macro F1: 0.456 Pos F1 0.011 Neg F1 0.901


## Greek

In [8]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Greek/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 1.053  Acc: 71.36% Macro F1: 0.444 Pos F1 0.056 Neg F1 0.831
