In [1]:
import sys

sys.path.append('..') # Allows imports from parent folder

In [2]:
from datasets import load_dataset
from model import NERClassifier
from preprocess_dataset import NERDataset 
from trainner import Trainner
from transformers import BertTokenizerFast
from preprocess_dataset import remove_empty_entries

import numpy as np
import torch

## Checkpoints to be used

In [3]:
pt_base_cased    = 'neuralmind/bert-base-portuguese-cased'
pt_large_cased   = 'neuralmind/bert-large-portuguese-cased'
en_base_uncased = 'bert-base-uncased'
en_large_uncased = 'bert-large-uncased'
en_base_cased = 'bert-base-cased'
en_large_cased = 'bert-large-cased'

pt = [pt_base_cased, pt_large_cased]
en = [en_base_cased, en_large_cased, en_base_uncased, en_large_uncased]
cased = [pt_base_cased, en_base_cased, pt_large_cased, en_large_cased]
uncased = [en_base_uncased, en_large_uncased] 

In [4]:
models = [pt, en]

In [5]:
flat_models = [check for checkpoints in models for check in checkpoints]

In [6]:
portuguese_flat = list(filter(lambda x: x.find('/') != -1, flat_models))

In [7]:
en_flat = list(filter(lambda x: x.find('/') == -1, flat_models))

In [8]:
en_flat

['bert-base-cased',
 'bert-large-cased',
 'bert-base-uncased',
 'bert-large-uncased']

In [9]:
portuguese_flat

['neuralmind/bert-base-portuguese-cased',
 'neuralmind/bert-large-portuguese-cased']

In [10]:
flat_models

['neuralmind/bert-base-portuguese-cased',
 'neuralmind/bert-large-portuguese-cased',
 'bert-base-cased',
 'bert-large-cased',
 'bert-base-uncased',
 'bert-large-uncased']

In [11]:
len(flat_models)

6

## Dataset

In [12]:
data = "lener_br"
dataset = load_dataset(data)
dataset = remove_empty_entries(dataset)

Reusing dataset lener_br (/home/caiotulio/.cache/huggingface/datasets/lener_br/lener_br/1.0.0/4a8c97e6813b5c2d85a50faf0a3e6c24ea82f4a9044e6e9e8b24997d27399382)
Loading cached processed dataset at /home/caiotulio/.cache/huggingface/datasets/lener_br/lener_br/1.0.0/4a8c97e6813b5c2d85a50faf0a3e6c24ea82f4a9044e6e9e8b24997d27399382/cache-5e59bc59f25f3d7f.arrow
Loading cached processed dataset at /home/caiotulio/.cache/huggingface/datasets/lener_br/lener_br/1.0.0/4a8c97e6813b5c2d85a50faf0a3e6c24ea82f4a9044e6e9e8b24997d27399382/cache-8d0457760cd67ee6.arrow
Loading cached processed dataset at /home/caiotulio/.cache/huggingface/datasets/lener_br/lener_br/1.0.0/4a8c97e6813b5c2d85a50faf0a3e6c24ea82f4a9044e6e9e8b24997d27399382/cache-74e841c1c151996a.arrow


## Hyperparameters

In [13]:
MAX_LEN = 128
LEARNING_RATE=3e-4
n_labels = 13
BATCH_SIZE=8
shuffle=True
NUM_EPOCHS=1
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Training different checkpoints

In [14]:
import pandas as pd
from evaluator import Evaluator
from torch.utils.data import DataLoader
from transformers import AdamW

In [15]:
def compare(checkpoints):

    data = {"f1_t":[], "f1_e":[], "loss_t": [], "loss_e": []}

    for idx, checkpoint in enumerate(checkpoints):
        print(f"Progresso: {idx+1}/{len(checkpoints)}")
        print(f"------Iniciando treino para o checkpoint {checkpoint}---------")

        tokenizer = BertTokenizerFast.from_pretrained(checkpoint)
        print("Tokenizer carregado!")
        pytorch_dataset_train = NERDataset(data=dataset['train'], max_len=MAX_LEN, tokenizer=tokenizer)
        pytorch_dataset_test = NERDataset(data=dataset['test'], max_len=MAX_LEN, tokenizer=tokenizer)
        
        loader_t = DataLoader(pytorch_dataset_train, batch_size=BATCH_SIZE, shuffle=shuffle)
        loader_e = DataLoader(pytorch_dataset_test, batch_size=BATCH_SIZE, shuffle=shuffle)
        print("Dataloader carregado!")

        model = NERClassifier(n_labels=n_labels, checkpoint=checkpoint)
        optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False)
        evaluator = Evaluator(loader_e, model, device)
        tr = Trainner(device,\
                      loader_t,\
                      model,\
                      optimizer,\
                      max_len=MAX_LEN,\
                      num_examples=len(pytorch_dataset_train),\
                      num_epochs=NUM_EPOCHS,\
                      evaluator=evaluator)
        print("Trainner carregado!")
        loss_t, loss_e, f1_e, f1_t = tr.train()
        print(f"Treino finalizado para o checkpoint {checkpoint}\n" + \
              f"loss_t:{loss_t}, loss_e:{loss_e}, f1_e:{f1_e}, f1_t:{f1_t}")
        data["f1_t"].append(f1_t[0]) # We return the f1 score for all epochs. Since we're using 
        data["f1_e"].append(f1_e[0]) # num_epochs=1, we'll just take the first item.
        data["loss_t"].append(loss_t[0])
        data["loss_e"].append(loss_e[0])
        del model
        torch.cuda.empty_cache()
    return data

## Comparing performance por portuguese checkpoints

In [16]:
%%time
df_pt = compare(portuguese_flat)

Progresso: 1/2
------Iniciando treino para o checkpoint neuralmind/bert-base-portuguese-cased---------
Tokenizer carregado!
Dataloader carregado!


Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model check

Trainner carregado!
Começando treino! Essa função retorna a media de f1 e loss em cada epoch de treino e avaliação
----------Começando treino da epoch nº 1
Treinando em cuda
Iteração 0 -------- Loss: 2.4702069759368896 f1 nas ultimas 100 iterações: 0.002755956898358687 ------ Progresso: 0.00%.
Iteração 100 -------- Loss: 0.4600450396537781 f1 nas ultimas 100 iterações: 0.8056355955265156 ------ Progresso: 10.21%.
Iteração 200 -------- Loss: 0.4597471356391907 f1 nas ultimas 100 iterações: 0.8029780273614623 ------ Progresso: 20.43%.
Iteração 300 -------- Loss: 0.4899894893169403 f1 nas ultimas 100 iterações: 0.8281529374266171 ------ Progresso: 30.64%.
Iteração 400 -------- Loss: 0.6882858872413635 f1 nas ultimas 100 iterações: 0.8229732041326381 ------ Progresso: 40.86%.
Iteração 500 -------- Loss: 1.063020944595337 f1 nas ultimas 100 iterações: 0.8148661588121638 ------ Progresso: 51.07%.
Iteração 600 -------- Loss: 1.0118082761764526 f1 nas ultimas 100 iterações: 0.8240364039683132 

Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model chec

Trainner carregado!
Começando treino! Essa função retorna a media de f1 e loss em cada epoch de treino e avaliação
----------Começando treino da epoch nº 1
Treinando em cuda
Iteração 0 -------- Loss: 2.7701570987701416 f1 nas ultimas 100 iterações: 0.0004972591284957725 ------ Progresso: 0.00%.
Iteração 100 -------- Loss: 1.189145565032959 f1 nas ultimas 100 iterações: 0.769894145387029 ------ Progresso: 10.21%.
Iteração 200 -------- Loss: 0.5390634536743164 f1 nas ultimas 100 iterações: 0.8167756595651062 ------ Progresso: 20.43%.
Iteração 300 -------- Loss: 0.29373303055763245 f1 nas ultimas 100 iterações: 0.8186484467385783 ------ Progresso: 30.64%.
Iteração 400 -------- Loss: 0.8086936473846436 f1 nas ultimas 100 iterações: 0.8297792277821973 ------ Progresso: 40.86%.
Iteração 500 -------- Loss: 0.47747480869293213 f1 nas ultimas 100 iterações: 0.809640515387076 ------ Progresso: 51.07%.
Iteração 600 -------- Loss: 0.5507462024688721 f1 nas ultimas 100 iterações: 0.8188105691635679

In [17]:
df_pt = pd.DataFrame(df_pt)
df_pt['checkpoint'] = portuguese_flat
df_pt = df_pt.set_index('checkpoint')

In [18]:
df_pt

Unnamed: 0_level_0,f1_t,f1_e,loss_t,loss_e
checkpoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
neuralmind/bert-base-portuguese-cased,0.8159,0.839253,0.660471,0.566779
neuralmind/bert-large-portuguese-cased,0.813524,0.835803,0.689029,0.602567


In [20]:
df_pt.to_csv('results/checkpoint_pt.csv')

## Comparing english **base** checkpoints

In [None]:
%%time
base_en = ['bert-base-cased', 'bert-base-uncased']
df_en_base = compare(base_en)

Progresso: 1/2
------Iniciando treino para o checkpoint bert-base-cased---------
Tokenizer carregado!
Dataloader carregado!


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cas

Trainner carregado!
Começando treino! Essa função retorna a media de f1 e loss em cada epoch de treino e avaliação
----------Começando treino da epoch nº 1
Treinando em cuda
Iteração 0 -------- Loss: 2.753157615661621 f1 nas ultimas 100 iterações: 0.0003143912737508796 ------ Progresso: 0.00%.
Iteração 100 -------- Loss: 0.2382536083459854 f1 nas ultimas 100 iterações: 0.7871588646859417 ------ Progresso: 10.21%.
Iteração 200 -------- Loss: 0.5384090542793274 f1 nas ultimas 100 iterações: 0.8156690559794779 ------ Progresso: 20.43%.
Iteração 300 -------- Loss: 0.3716413080692291 f1 nas ultimas 100 iterações: 0.8176875013075072 ------ Progresso: 30.64%.
Iteração 400 -------- Loss: 0.3272955119609833 f1 nas ultimas 100 iterações: 0.8075146874991613 ------ Progresso: 40.86%.
Iteração 500 -------- Loss: 0.5871379375457764 f1 nas ultimas 100 iterações: 0.8179899213297555 ------ Progresso: 51.07%.


In [None]:
df_en_base = pd.DataFrame(df_en_base)
df_en_base['checkpoint'] = base_en
df_en_base = df_en_base.set_index('checkpoint')

In [None]:
df_en_base

In [None]:
df_en_base.to_csv('results/checkpoint_en_base.csv')

## Comparing english **large** checkpoints

In [16]:
%%time
large_en = ['bert-large-cased', 'bert-large-uncased']
df_en_large = compare(large_en)

Progresso: 1/2
------Iniciando treino para o checkpoint bert-large-cased---------
Tokenizer carregado!
Dataloader carregado!


Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-c

Trainner carregado!
Começando treino! Essa função retorna a media de f1 e loss em cada epoch de treino e avaliação
----------Começando treino da epoch nº 1
Treinando em cuda
Iteração 0 -------- Loss: 2.6045541763305664 f1 nas ultimas 100 iterações: 0.00154016374670789 ------ Progresso: 0.00%.
Iteração 100 -------- Loss: 0.9199352264404297 f1 nas ultimas 100 iterações: 0.7902711133829589 ------ Progresso: 10.21%.
Iteração 200 -------- Loss: 0.6169118881225586 f1 nas ultimas 100 iterações: 0.8072346429979547 ------ Progresso: 20.43%.
Iteração 300 -------- Loss: 0.47847306728363037 f1 nas ultimas 100 iterações: 0.8204010252715355 ------ Progresso: 30.64%.
Iteração 400 -------- Loss: 0.7637054920196533 f1 nas ultimas 100 iterações: 0.8174800442002979 ------ Progresso: 40.86%.
Iteração 500 -------- Loss: 0.34791430830955505 f1 nas ultimas 100 iterações: 0.8089613221079461 ------ Progresso: 51.07%.
Iteração 600 -------- Loss: 0.5794242024421692 f1 nas ultimas 100 iterações: 0.805404727318539

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large

Trainner carregado!
Começando treino! Essa função retorna a media de f1 e loss em cada epoch de treino e avaliação
----------Começando treino da epoch nº 1
Treinando em cuda
Iteração 0 -------- Loss: 3.028632164001465 f1 nas ultimas 100 iterações: 4.8061664984296096e-05 ------ Progresso: 0.00%.
Iteração 100 -------- Loss: 0.6114732027053833 f1 nas ultimas 100 iterações: 0.780257454046546 ------ Progresso: 10.21%.
Iteração 200 -------- Loss: 0.6320658922195435 f1 nas ultimas 100 iterações: 0.8261782828854299 ------ Progresso: 20.43%.
Iteração 300 -------- Loss: 0.40635061264038086 f1 nas ultimas 100 iterações: 0.8087664604218304 ------ Progresso: 30.64%.
Iteração 400 -------- Loss: 0.961594820022583 f1 nas ultimas 100 iterações: 0.7978533114933755 ------ Progresso: 40.86%.
Iteração 500 -------- Loss: 0.6462196707725525 f1 nas ultimas 100 iterações: 0.8026644734093185 ------ Progresso: 51.07%.
Iteração 600 -------- Loss: 0.47248491644859314 f1 nas ultimas 100 iterações: 0.807614401169229

In [18]:
df_en_large = pd.DataFrame(df_en_large)
df_en_large['checkpoint'] = large_en
df_en_large = df_en_large.set_index('checkpoint')

In [19]:
df_en_large

Unnamed: 0_level_0,f1_t,f1_e,loss_t,loss_e
checkpoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bert-large-cased,0.810625,0.837057,0.688465,0.582503
bert-large-uncased,0.809515,0.836821,0.70529,0.593883


In [29]:
df_en_large.to_csv('results/checkpoint_en_large.csv')

## Merging the existing dfs

In [22]:
df_pt = pd.read_csv('results/checkpoint_pt.csv')

In [23]:
df_pt

Unnamed: 0,checkpoint,f1_t,f1_e,loss_t,loss_e
0,neuralmind/bert-base-portuguese-cased,0.8159,0.839253,0.660471,0.566779
1,neuralmind/bert-large-portuguese-cased,0.813524,0.835803,0.689029,0.602567


In [26]:
df_en_base = pd.read_csv('results/checkpoint_en_base.csv')

In [27]:
df_en_base

Unnamed: 0,checkpoint,f1_t,f1_e,loss_t,loss_e
0,bert-base-cased,0.811315,0.839581,0.688456,0.582071
1,bert-base-uncased,0.810601,0.838607,0.687892,0.570408


In [30]:
df_en_large = pd.read_csv('results/checkpoint_en_large.csv')

In [31]:
df_en_large

Unnamed: 0,checkpoint,f1_t,f1_e,loss_t,loss_e
0,bert-large-cased,0.810625,0.837057,0.688465,0.582503
1,bert-large-uncased,0.809515,0.836821,0.70529,0.593883


In [36]:
df_final = pd.concat([df_pt, df_en_base, df_en_large])

In [38]:
df_final = df_final.set_index('checkpoint')

In [39]:
df_final

Unnamed: 0_level_0,f1_t,f1_e,loss_t,loss_e
checkpoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
neuralmind/bert-base-portuguese-cased,0.8159,0.839253,0.660471,0.566779
neuralmind/bert-large-portuguese-cased,0.813524,0.835803,0.689029,0.602567
bert-base-cased,0.811315,0.839581,0.688456,0.582071
bert-base-uncased,0.810601,0.838607,0.687892,0.570408
bert-large-cased,0.810625,0.837057,0.688465,0.582503
bert-large-uncased,0.809515,0.836821,0.70529,0.593883


In [40]:
df_final.to_csv("results/all_checkpoints.csv")