## Cased Multilingual Test

Let's load a BERT model in English and test it against the other languages

In [1]:
%load_ext autoreload
%autoreload 2
import os
from datetime import datetime
import fire
import torch
from torchtext import data
import torch.nn as nn
from transformers import (
    AdamW, BertForSequenceClassification, BertTokenizer,
    get_constant_schedule_with_warmup
)

from offenseval.nn import (
    Tokenizer,
    train, evaluate, train_cycle, save_model, load_model
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, TEXT = load_model("../models/bert.cased.sample.mean05.ft.pt", device)

In [2]:
from offenseval.nn import evaluate_dataset

loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/olid/test_a.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.679  Acc: 82.56% Macro F1: 0.786 Pos F1 0.694 Neg F1 0.878


## Danish

In [3]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Danish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.511  Acc: 86.32% Macro F1: 0.643 Pos F1 0.362 Neg F1 0.923


## Turkish

In [4]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Turkish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.726  Acc: 80.23% Macro F1: 0.516 Pos F1 0.144 Neg F1 0.888


## Arabic

In [5]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Arabic/offenseval-ar-dev-v1.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.667  Acc: 82.20% Macro F1: 0.482 Pos F1 0.063 Neg F1 0.902


## Greek

In [6]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Greek/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.887  Acc: 71.36% Macro F1: 0.467 Pos F1 0.104 Neg F1 0.830
