## Uncased Multilingual Test

Let's load a BERT model in English and test it against the other languages

In [1]:
%load_ext autoreload
%autoreload 2
import os
from datetime import datetime
import fire
import torch
from torchtext import data
import torch.nn as nn
from transformers import (
    AdamW, BertForSequenceClassification, BertTokenizer,
    get_constant_schedule_with_warmup
)

from offenseval.nn import (
    Tokenizer,
    train, evaluate, train_cycle, save_model, load_model
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model, TEXT = load_model("../models/bert.cased.sample.mean06.ft.pt", device)

In [2]:
from offenseval.nn import evaluate_dataset

loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/olid/test_a.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.866  Acc: 82.56% Macro F1: 0.752 Pos F1 0.617 Neg F1 0.887


## Danish

In [3]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Danish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.657  Acc: 88.51% Macro F1: 0.635 Pos F1 0.333 Neg F1 0.937


## Turkish

In [4]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Turkish/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 1.029  Acc: 79.65% Macro F1: 0.468 Pos F1 0.051 Neg F1 0.886


## Arabic

In [5]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Arabic/offenseval-ar-dev-v1.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 0.952  Acc: 82.20% Macro F1: 0.457 Pos F1 0.011 Neg F1 0.902


## Greek

In [6]:
loss, acc, f1, pos_f1, neg_f1 = evaluate_dataset(
    model, TEXT, "../data/Greek/dev.tsv"
)

print(f'Test Loss: {loss:.3f}  Acc: {acc*100:.2f}% Macro F1: {f1:.3f} Pos F1 {pos_f1:.3f} Neg F1 {neg_f1:.3f}')

Loading dataset...
Building iterators
Test Loss: 1.520  Acc: 71.53% Macro F1: 0.423 Pos F1 0.012 Neg F1 0.834
