In [None]:
# from google.colab import drive
# drive.mount('/content/drive/')
# %cd "/content/drive/MyDrive/Colab Notebooks/SemEval WS/SemEval2023/Evaluation"

In [None]:
# !pip install transformers
# !pip install datasets
# !pip install evaluate
# !pip install colorama

In [3]:
import sys
import os
sys.path.append('../')
import pandas as pd
import torch 
import numpy as np
from transformers import AutoModelForTokenClassification, AutoTokenizer
from tqdm import tqdm
import random
from util.utils import feval, get_tag_mappings, get_data_from_hub, write_conll_format
from util.dataloader import PreDataCollator
os.environ["WANDB_DISABLED"] = "true"

### Env Setup

In [4]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

### Seed all

SEED = 42

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

### Instructions

Set the variables in the next cell according to the experiment:

``LANG``: Set the language. You can find the language codes in the excel file.

``TOKENIZER_NAME`` or ``MODEL_NAME``: Huggingface Model link that we uploaded after training

``SET``: Select the dataset that you used before in training

- ``None`` --> **None Augmentation** (No Augmentation from wiki)
- ``tags`` --> **Max Augmentation** (Maximum Augmentation from wiki)
- ``LM`` --> **Entity Extractor** (Augmentation from wiki after extracting tags using other NER model)


**Don't forget to update the results in the excel sheet. The link is given below.**

[Link to Excel File](https://docs.google.com/spreadsheets/d/11LXkOBWxpWDGMsi9XC72eMNSJI14Qo2iwP8qugwjyqU/edit#gid=0)

### Define Variables

In [5]:
LANG = 'en' # use None for all lang
MAX_LEN = 256
TOKENIZER_NAME = 'garNER/roberta-large-en'
MODEL_NAME = 'garNER/roberta-large-en'
SET = None # or 'tags' or 'LM' or None
IS_CRF = False

### Preparing data

In [6]:
test_data = get_data_from_hub(LANG, SET, split='validation')



In [7]:
# Check random data item

print(test_data[6]['sent'])
print(test_data[6]['labels'])

two  important  voices  who  applied  incommensurability  to  historical  and  philosophical  notions  of  science  in  the  1960s  are  thomas  kuhn  and  paul  feyerabend  . 
 O  O  O  O  O  O  O  O  O  O  O  O  O  O  O  O  O  B-Scientist  I-Scientist  O  B-OtherPER  I-OtherPER  O


Dataset({
    features: ['ID', 'lang', 'sent', 'labels'],
    num_rows: 870
})

### Tokenization

In [8]:
tags_to_ids, ids_to_tags = get_tag_mappings()
number_of_labels = len(tags_to_ids)

In [9]:
## load appropiate tokenizer for pre-trained models
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME, use_fast=True)

In [10]:
collator = PreDataCollator(tokenizer=tokenizer, max_len=MAX_LEN, tags_to_ids = tags_to_ids)

In [None]:

test_tokenized = test_data.map(collator, remove_columns=test_data.column_names, batch_size=4, num_proc=4, batched=True)



### Load Saved Model

In [12]:
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME, num_labels=number_of_labels)
model = model.to(device)

### Evaluation

In [13]:
outputs, vis = feval(test_data,test_tokenized, model, device, IS_CRF=IS_CRF)

100%|██████████| 870/870 [00:52<00:00, 16.71it/s]


Accuracy: 0.9272070988118514
F1: 0.6633435119454568


In [14]:
print(vis[10])

it [42m[O][O][0m stars [42m[O][O][0m tomokazu [42m[B-Artist][B-Artist][0m sugita [42m[I-Artist][I-Artist][0m daisuke [42m[B-OtherPER][0m[41m[B-Artist][0m sakaguchi [42m[I-OtherPER][0m[41m[I-Artist][0m rie [42m[B-Artist][B-Artist][0m kugimiya [42m[I-Artist][I-Artist][0m among [42m[O][O][0m others [42m[O][O][0m . [42m[O][O][0m


In [15]:
df = pd.DataFrame(outputs, columns=['ID','lang', 'sent','predictions','true'])

In [17]:
fileName = MODEL_NAME.split('/')[-1]+'-outputs.txt'
write_conll_format(fileName, df, col='predictions')
