## BERT Cased trained on OLID Multilingual Test

Let's load a BERT model in OLID and test it against the other languages

In [1]:
%load_ext autoreload
%autoreload 2
import os
from datetime import datetime
import fire
import torch
from torchtext import data
import torch.nn as nn
from transformers import (
    AdamW, BertForSequenceClassification, BertTokenizer,
    get_constant_schedule_with_warmup
)

from offenseval.nn import (
    Tokenizer,
    train, evaluate, train_cycle, save_model, load_model
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, TEXT = load_model("../models/bert_cased.olid.pt", device)

In [2]:
from offenseval.nn import evaluate_dataset

loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/olid/test_a.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.436  Acc: 83.02% Macro F1: 0.792 Pos F1 0.702 Neg F1 0.881


## Danish

In [3]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Danish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.481  Acc: 80.91% Macro F1: 0.602 Pos F1 0.315 Neg F1 0.889


## Turkish

In [4]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Turkish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.532  Acc: 74.78% Macro F1: 0.551 Pos F1 0.253 Neg F1 0.848


## Arabic

In [5]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Arabic/offenseval-ar-dev-v1.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.471  Acc: 81.40% Macro F1: 0.492 Pos F1 0.088 Neg F1 0.896


## Greek

In [6]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Greek/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.586  Acc: 68.55% Macro F1: 0.568 Pos F1 0.342 Neg F1 0.793
