# Fine-tuning

Test several pre-trained models from HuggingFace, fine tune on adus data, and hypertune parameters

In [None]:
!pip install pandas
!pip install datasets
!pip install transformers
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
!pip install "ray[tune]" optuna

## Preparing our Data

In this notebook, we'll start by using a local dataset (instead of using a dataset stored at Hugging Face).
Let's load data for our classification task.

### Loading dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

# Importing the dataset
dataset = pd.read_excel('/content/drive/Shareddrives/PLN/Assignment 2/data/OpArticles_ADUs.xlsx')
dataset = dataset.drop(columns=['article_id', 'annotator', 'node','ranges'])
dataset['label'].replace(['Value', 'Value(+)', 'Value(-)', 'Fact', 'Policy'],[0,1,2,3,4], inplace=True)

print(dataset.info())
print(dataset.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16743 entries, 0 to 16742
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tokens  16743 non-null  object
 1   label   16743 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 261.7+ KB
None
                                              tokens  label
0           O facto não é apenas fruto da ignorância      0
1  havia no seu humor mais jornalismo (mais inves...      0
2                              É tudo cómico na FIFA      0
3  o que todos nós permitimos que esta organizaçã...      0
4            não nos fazem rir à custa dos poderosos      0


For ease of usage with Transformer models, we convert the dataset into a Hugging Face dataset and split it into train, validation and test sets.

In [None]:
from datasets import Dataset

dataset_hf = Dataset.from_pandas(dataset)

In [None]:
from datasets import DatasetDict

# 90% train, 10% test+validation
train_test = dataset_hf.train_test_split(test_size=0.1, shuffle=True, seed=42)

# Split the 10% test+validation set in half test, half validation
valid_test = train_test['test'].train_test_split(test_size=0.5, shuffle=True, seed=42)

# gather everyone if you want to have a single DatasetDict
train_valid_test_dataset = DatasetDict({
    'train': train_test['train'],
    'validation': valid_test['train'],
    'test': valid_test['test']
})

In [None]:
train_valid_test_dataset

DatasetDict({
    train: Dataset({
        features: ['tokens', 'label'],
        num_rows: 15068
    })
    validation: Dataset({
        features: ['tokens', 'label'],
        num_rows: 837
    })
    test: Dataset({
        features: ['tokens', 'label'],
        num_rows: 838
    })
})

## Fine-tuning a pretrained model

### Tokenizer

We first load the tokenizer for our model:

In [None]:
from transformers import AutoTokenizer

def get_tokenizer(name):
    return AutoTokenizer.from_pretrained(name)

Now we need to [preprocess](https://huggingface.co/docs/transformers/preprocessing) our data.

Obtaining the length of the longest sequences in our data splits

In [None]:
def find_max_length(dataset):
    return len(max(dataset, key=lambda x: len(x.split())).split())

train_max_length = find_max_length(train_valid_test_dataset["train"]["tokens"])
val_max_length = find_max_length(train_valid_test_dataset["validation"]["tokens"])
test_max_length = find_max_length(train_valid_test_dataset["test"]["tokens"])

print(f"Longest sequence in train set has {train_max_length} words")
print(f"Longest sequence in val set has {val_max_length} words")
print(f"Longest sequence in test set has {test_max_length} words")

Longest sequence in train set has 81 words
Longest sequence in val set has 51 words
Longest sequence in test set has 59 words


Tokenize entire dataset

In [None]:
# Define tokenizer
tokenizer = None

def tokenize_dataset(sample):
    return tokenizer(sample["tokens"], truncation=True, max_length=81, padding="max_length")

def get_tokenized_data(dataset):
    return dataset.map(tokenize_dataset, batched=True, remove_columns=["tokens"])

### Loading the model

Since we want to use the model for classification, we should load it with an appropriate classification head:

In [None]:
from transformers import AutoModelForSequenceClassification

# Define later
model_name = None

def get_model():
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5, ignore_mismatched_sizes=True)
    model.cuda()

    return model

### Fine-tuning

The next step is to [fine-tune](https://huggingface.co/docs/transformers/training) the model with our train data. To do so, we can make use of a [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer).
There are several aspects of training that you can specify via [TrainingArguments](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments).

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from IPython.display import display

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def get_trainingArgs():
    return TrainingArguments(
        output_dir="./results",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        fp16=True,
        learning_rate=2e-5,
        num_train_epochs=3,
        weight_decay=0.01,
        seed=42,
        data_seed=42,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="eval_f1"
    )

def get_trainer(model_init_, args_, dataset_, tokenizer_, data_collator_, compute_metrics_):
    return Trainer(
        model_init=model_init_,
        args=args_,
        train_dataset=dataset_["train"],
        eval_dataset=dataset_["validation"],
        tokenizer=tokenizer_,
        data_collator=data_collator_,
        compute_metrics=compute_metrics_
    )

def train_model(model_name):
  global tokenizer
  tokenizer = get_tokenizer(model_name)
  tokenized_dataset = get_tokenized_data(train_valid_test_dataset)

  trainer = get_trainer(
    get_model,
    get_trainingArgs(),
    tokenized_dataset,
    tokenizer,
    DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics
    )
  
  # Train Model
  display(trainer.train())

  # Check performance in validation set
  display(trainer.evaluate())

  # Check how the model fares in our test set.
  display(trainer.predict(test_dataset=tokenized_dataset["test"]))

  # Save model for future use
  trainer.save_model('/content/drive/Shareddrives/PLN/Assignment 2/models/baseline/' + model_name)


#### Custom training to use a weighted loss

Useful for our unbalanced training set

In [None]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_valid_test_dataset['train']['label']),
    y=train_valid_test_dataset['train']['label']
    )
class_weights

array([0.41220079, 2.36732129, 1.15066819, 0.92356727, 5.00598007])

In [None]:
from torch import nn, tensor

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        #inputs = inputs.to(device)
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss (5 labels with different weight)
        loss_fct = nn.CrossEntropyLoss(weight=tensor([0.41, 2.37, 1.15, 0.92, 5.01]))
        loss_fct.cuda()
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss

def get_trainer_custom(model_init_, args_, dataset_, tokenizer_, data_collator_, compute_metrics_):
    return CustomTrainer(
        model_init=model_init_,
        args=args_,
        train_dataset=dataset_["train"],
        eval_dataset=dataset_["validation"],
        tokenizer=tokenizer_,
        data_collator=data_collator_,
        compute_metrics=compute_metrics_
    )

def train_model_custom(model_name):
  global tokenizer
  tokenizer = get_tokenizer(model_name)
  tokenized_dataset = get_tokenized_data(train_valid_test_dataset)

  trainer = get_trainer_custom(
    get_model,
    get_trainingArgs(),
    tokenized_dataset,
    tokenizer,
    DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics
    )
  
  # Train Model
  display(trainer.train())

  # Check performance in validation set
  display(trainer.evaluate())

  # Check how the model fares in our test set.
  display(trainer.predict(test_dataset=tokenized_dataset["test"]))

  # Save model for future use
  trainer.save_model('/content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/' + model_name)

#### Hyperparameter Tuning

In [None]:
def my_hp_space(trial):
  return {
    "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32]),
    "per_device_eval_batch_size": trial.suggest_categorical("per_device_eval_batch_size", [16, 32]),
    "weight_decay": trial.suggest_categorical("weight_decay", [0, 0.01, 0.1]),
    "learning_rate": trial.suggest_categorical("learning_rate", [2e-6, 2e-5, 2e-4]),
    "num_train_epochs": trial.suggest_categorical("num_train_epochs", [3, 5])
  }

def my_objective(metrics):
  return metrics["eval_f1"]

def get_trainer_hyper(model_init_, args_, dataset_, tokenizer_, data_collator_, compute_metrics_):
  return Trainer(
      model_init=model_init_,
      args=args_,
      train_dataset=dataset_["train"],
      eval_dataset=dataset_["validation"],
      tokenizer=tokenizer_,
      data_collator=data_collator_,
      compute_metrics=compute_metrics_
  )

def train_model_hyper(model_name):
  global tokenizer
  tokenizer = get_tokenizer(model_name)
  tokenized_dataset = get_tokenized_data(train_valid_test_dataset)

  trainer = get_trainer_hyper(
    get_model,
    get_trainingArgs(),
    tokenized_dataset,
    tokenizer,
    DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics
    )
  
  best_run = trainer.hyperparameter_search(
      hp_space=my_hp_space,
      n_trials=5,
      direction="maximize",
      compute_objective=my_objective
  )

  print("***** Hypertuning best run *****")
  display(best_run)
  
  # Set params
  for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

  # Save model for future use
  trainer.save_model('/content/drive/Shareddrives/PLN/Assignment 2/models/baseline/hyper/' + model_name)

### PT - [Geotrend/distilbert-base-pt-cased](https://huggingface.co/Geotrend/distilbert-base-pt-cased)

In [None]:
model_name = "Geotrend/distilbert-base-pt-cased"

In [None]:
train_model(model_name)

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/Geotrend/distilbert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/81477275f67d8ef9f4af68aeb6799256587efc91da21e00c93a447c4f529447f.6168459fbc8e0022c73def07f9e7976d25cbac85c752238f63549206aeba6abc
Model config DistilBertConfig {
  "_name_or_path": "Geotrend/distilbert-base-pt-cased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinuso

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.09142,0.526882,0.401252,0.503721,0.376302
2,1.161900,1.080899,0.554361,0.493484,0.555143,0.478184
3,0.958200,1.091655,0.55675,0.508679,0.541024,0.498284


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.9993629536557855, metrics={'train_runtime': 161.8872, 'train_samples_per_second': 279.231, 'train_steps_per_second': 8.728, 'total_flos': 947379900386520.0, 'train_loss': 0.9993629536557855, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5567502986857825,
 'eval_f1': 0.5086788931624167,
 'eval_loss': 1.0916552543640137,
 'eval_precision': 0.5410243881818309,
 'eval_recall': 0.4982836960672012,
 'eval_runtime': 0.9421,
 'eval_samples_per_second': 888.455,
 'eval_steps_per_second': 28.66}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.8022, -0.5796,  1.922 , -0.1488, -2.469 ],
       [ 2.906 , -1.33  ,  0.197 ,  0.2345, -2.705 ],
       [ 2.113 , -1.627 ,  1.341 ,  0.3484, -2.807 ],
       ...,
       [ 2.594 , -1.692 , -0.6704,  1.192 , -1.873 ],
       [ 0.4778, -1.39  ,  2.557 , -0.1533, -2.01  ],
       [ 2.281 , -1.909 ,  1.6   , -0.2612, -2.05  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/distilbert-base-pt-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/distilbert-base-pt-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/distilbert-base-pt-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/distilbert-base-pt-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/distilbert-base-pt-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/Geotrend/distilbert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/81477275f67d8ef9f4af68aeb6799256587efc91da21e00c93a447c4f529447f.6168459fbc8e0022c73def07f9e7976d25cbac85c752238f63549206aeba6abc
Model config DistilBertConfig {
  "_name_or_path": "Geotrend/distilbert-base-pt-cased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.19.2",
  "vocab_size": 25000
}

loading file https://huggingface.co/Geotrend/distilbert-base-pt-cased/resolve/main/vocab.txt from cache at /root/.cache/h

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/Geotrend/distilbert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/81477275f67d8ef9f4af68aeb6799256587efc91da21e00c93a447c4f529447f.6168459fbc8e0022c73def07f9e7976d25cbac85c752238f63549206aeba6abc
Model config DistilBertConfig {
  "_name_or_path": "Geotrend/distilbert-base-pt-cased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "label2id": {


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.157909,0.452808,0.460786,0.445467,0.558141
2,1.298000,1.115656,0.489845,0.483956,0.452715,0.569498
3,0.990600,1.124693,0.487455,0.483804,0.453093,0.577848


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=1.0606480171219812, metrics={'train_runtime': 161.8942, 'train_samples_per_second': 279.219, 'train_steps_per_second': 8.728, 'total_flos': 947379900386520.0, 'train_loss': 1.0606480171219812, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.48984468339307047,
 'eval_f1': 0.48395577705005427,
 'eval_loss': 1.1156558990478516,
 'eval_precision': 0.45271520149199035,
 'eval_recall': 0.569497627229586,
 'eval_runtime': 0.9517,
 'eval_samples_per_second': 879.488,
 'eval_steps_per_second': 28.371}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.1675 , -0.8647 ,  2.055  , -0.1212 , -1.754  ],
       [ 1.467  , -0.874  ,  0.6045 ,  0.2031 , -1.951  ],
       [ 1.034  , -1.188  ,  1.406  ,  0.3562 , -2.14   ],
       ...,
       [ 1.409  , -1.322  , -0.05206,  1.258  , -1.819  ],
       [ 0.4797 , -1.363  ,  2.254  , -0.3884 , -1.489  ],
       [ 1.091  , -1.681  ,  1.879  ,  0.0966 , -1.884  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/distilbert-base-pt-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/distilbert-base-pt-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/distilbert-base-pt-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/distilbert-base-pt-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/distilbert-base-pt-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

### PT - [Geotrend/bert-base-pt-cased](https://huggingface.co/Geotrend/bert-base-pt-cased)

In [None]:
model_name = "Geotrend/bert-base-pt-cased"

In [None]:
train_model(model_name)

https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp0rcghb61


Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

storing https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/29ca3cacb025b278614e903a0eca5e20b79457c33dcf2fa64250874c94b5a2c7.25d8d06fb0679146a3ed2a3463e3585380bff882fe6e1ebc497196e40dbbd7fa
creating metadata file for /root/.cache/huggingface/transformers/29ca3cacb025b278614e903a0eca5e20b79457c33dcf2fa64250874c94b5a2c7.25d8d06fb0679146a3ed2a3463e3585380bff882fe6e1ebc497196e40dbbd7fa
https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpjhgcq87k


Downloading:   0%|          | 0.00/752 [00:00<?, ?B/s]

storing https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
creating metadata file for /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
loading configuration file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
Model config BertConfig {
  "_name_or_path": "Geotrend/bert-base-pt-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,


Downloading:   0%|          | 0.00/158k [00:00<?, ?B/s]

storing https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/6ea20dcc81530a9430480cfaa29ffcdb400a8a751aa5869a24b4a03a692c87fe.17ae322c57d9ccda798c1db9e312f6c1ea02b84d9a67150c1be5d3e147a5a1d4
creating metadata file for /root/.cache/huggingface/transformers/6ea20dcc81530a9430480cfaa29ffcdb400a8a751aa5869a24b4a03a692c87fe.17ae322c57d9ccda798c1db9e312f6c1ea02b84d9a67150c1be5d3e147a5a1d4
loading file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/6ea20dcc81530a9430480cfaa29ffcdb400a8a751aa5869a24b4a03a692c87fe.17ae322c57d9ccda798c1db9e312f6c1ea02b84d9a67150c1be5d3e147a5a1d4
loading file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/tokenizer.json from cache at None
loading file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/Geotrend/bert

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
Model config BertConfig {
  "_name_or_path": "Geotrend/bert-base-pt-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "L

Downloading:   0%|          | 0.00/402M [00:00<?, ?B/s]

storing https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/61d2e20c36c15f6e61ab533914b2c5b57e0cae6082ada8322a8043ec6097ac76.b1c0f7b33f2434732d4bfb30a7f2c36d34a5c3b2730d01566ad10597bcaddaea
creating metadata file for /root/.cache/huggingface/transformers/61d2e20c36c15f6e61ab533914b2c5b57e0cae6082ada8322a8043ec6097ac76.b1c0f7b33f2434732d4bfb30a7f2c36d34a5c3b2730d01566ad10597bcaddaea
loading weights file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/61d2e20c36c15f6e61ab533914b2c5b57e0cae6082ada8322a8043ec6097ac76.b1c0f7b33f2434732d4bfb30a7f2c36d34a5c3b2730d01566ad10597bcaddaea
Some weights of the model checkpoint at Geotrend/bert-base-pt-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predict

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.076494,0.540024,0.444829,0.530485,0.431048
2,1.130800,1.024575,0.567503,0.527293,0.564836,0.51421
3,0.904900,1.04548,0.585424,0.562684,0.575958,0.561199


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.9477275786295227, metrics={'train_runtime': 309.2618, 'train_samples_per_second': 146.167, 'train_steps_per_second': 4.569, 'total_flos': 1881666784115928.0, 'train_loss': 0.9477275786295227, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5854241338112306,
 'eval_f1': 0.5626841686842015,
 'eval_loss': 1.0454798936843872,
 'eval_precision': 0.5759576349042975,
 'eval_recall': 0.561198658157421,
 'eval_runtime': 1.6651,
 'eval_samples_per_second': 502.668,
 'eval_steps_per_second': 16.215}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.467  , -0.7935 ,  0.554  ,  0.287  , -2.082  ],
       [ 2.943  , -1.81   , -0.349  ,  1.024  , -2.637  ],
       [ 1.874  , -1.769  ,  2.326  , -0.831  , -1.694  ],
       ...,
       [ 2.568  , -2.459  ,  0.3594 ,  0.68   , -1.996  ],
       [-0.06494, -1.254  ,  3.256  , -0.1514 , -1.845  ],
       [ 2.613  , -1.986  ,  1.412  , -0.10223, -2.629  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/bert-base-pt-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/bert-base-pt-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/bert-base-pt-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/bert-base-pt-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/Geotrend/bert-base-pt-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
Model config BertConfig {
  "_name_or_path": "Geotrend/bert-base-pt-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",


  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/Geotrend/bert-base-pt-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/0ebe421d721676677f14dd2556297b76144988966ccb9ca2655253e71be546e1.49ec507c8c81787c05598347db28fe699ccea17f6f63e025aecca400cf4922f8
Model config BertConfig {
  "_name_or_path": "Geotrend/bert-base-pt-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "L

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.147814,0.471924,0.46882,0.450708,0.567563
2,1.262900,1.055248,0.525687,0.521591,0.490529,0.627436
3,0.925700,1.092987,0.528076,0.536268,0.501378,0.632357


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.9954000096412221, metrics={'train_runtime': 304.7673, 'train_samples_per_second': 148.323, 'train_steps_per_second': 4.636, 'total_flos': 1881666784115928.0, 'train_loss': 0.9954000096412221, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5280764635603346,
 'eval_f1': 0.5362681377085261,
 'eval_loss': 1.0929874181747437,
 'eval_precision': 0.5013780373859207,
 'eval_recall': 0.6323572246768123,
 'eval_runtime': 1.6742,
 'eval_samples_per_second': 499.941,
 'eval_steps_per_second': 16.127}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.1782, -0.996 ,  2.299 , -0.2695, -1.591 ],
       [ 2.006 , -1.347 ,  1.023 ,  1.021 , -3.203 ],
       [ 1.732 , -1.834 ,  2.197 , -0.4438, -1.445 ],
       ...,
       [ 1.5625, -2.316 ,  1.687 ,  0.85  , -2.012 ],
       [ 0.1598, -1.537 ,  3.578 , -0.4612, -1.6875],
       [ 1.787 , -1.585 ,  2.174 ,  0.641 , -3.18  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/bert-base-pt-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/bert-base-pt-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/bert-base-pt-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/bert-base-pt-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/Geotrend/bert-base-pt-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

### PT - [neuralmind/bert-base-portuguese-cased](https://huggingface.co/neuralmind/bert-base-portuguese-cased)

In [None]:
model_name = "neuralmind/bert-base-portuguese-cased"

In [None]:
train_model(model_name)

https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp82lewq6n


Downloading:   0%|          | 0.00/43.0 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/f1a9ba41d40e8c6f5ba4988aa2f7702c3b43768183e4b82483e04f2848841ecf.a6c00251b9344c189e2419373d6033016d0cd3d87ea59f6c86069046ac81956d
creating metadata file for /root/.cache/huggingface/transformers/f1a9ba41d40e8c6f5ba4988aa2f7702c3b43768183e4b82483e04f2848841ecf.a6c00251b9344c189e2419373d6033016d0cd3d87ea59f6c86069046ac81956d
https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp07j5k7k0


Downloading:   0%|          | 0.00/647 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
creating metadata file for /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hi

Downloading:   0%|          | 0.00/205k [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/aa6d50227b77416b26162efcf0cc9e9a702d13920840322060a2b41a44a8aff4.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
creating metadata file for /root/.cache/huggingface/transformers/aa6d50227b77416b26162efcf0cc9e9a702d13920840322060a2b41a44a8aff4.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/added_tokens.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpfz9p5h3u


Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/added_tokens.json in cache at /root/.cache/huggingface/transformers/9188d297517828a862f4e0b0700968574ca7ad38fbc0832c409bf7a9e5576b74.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b
creating metadata file for /root/.cache/huggingface/transformers/9188d297517828a862f4e0b0700968574ca7ad38fbc0832c409bf7a9e5576b74.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b
https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpohpcansd


Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/eecc45187d085a1169eed91017d358cc0e9cbdd5dc236bcd710059dbf0a2f816.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d
creating metadata file for /root/.cache/huggingface/transformers/eecc45187d085a1169eed91017d358cc0e9cbdd5dc236bcd710059dbf0a2f816.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d
loading file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/aa6d50227b77416b26162efcf0cc9e9a702d13920840322060a2b41a44a8aff4.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
loading file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/tokenizer.json from cache at None
loading file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/added_tokens.json from cache at 

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4

Downloading:   0%|          | 0.00/418M [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/1e42c907c340c902923496246dae63e33f64955c529720991b7ec5543a98e442.fa492fca6dcee85bef053cc60912a211feb1f7173129e4eb1a5164e817f2f5f2
creating metadata file for /root/.cache/huggingface/transformers/1e42c907c340c902923496246dae63e33f64955c529720991b7ec5543a98e442.fa492fca6dcee85bef053cc60912a211feb1f7173129e4eb1a5164e817f2f5f2
loading weights file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/1e42c907c340c902923496246dae63e33f64955c529720991b7ec5543a98e442.fa492fca6dcee85bef053cc60912a211feb1f7173129e4eb1a5164e817f2f5f2
Some weights of the model checkpoint at neuralmind/bert-base-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.trans

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.942205,0.605735,0.562372,0.622416,0.529411
2,1.041300,0.957099,0.605735,0.593564,0.600243,0.603709
3,0.791400,0.995687,0.608124,0.593587,0.585602,0.611254


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.8501922974583113, metrics={'train_runtime': 306.6928, 'train_samples_per_second': 147.392, 'train_steps_per_second': 4.607, 'total_flos': 1881666784115928.0, 'train_loss': 0.8501922974583113, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.6081242532855436,
 'eval_f1': 0.5935870215616074,
 'eval_loss': 0.9956874847412109,
 'eval_precision': 0.5856018554046334,
 'eval_recall': 0.6112540228004144,
 'eval_runtime': 1.6663,
 'eval_samples_per_second': 502.297,
 'eval_steps_per_second': 16.203}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.265e+00,  2.188e+00, -2.840e+00, -8.140e-01,  1.819e-01],
       [ 3.586e+00, -1.773e+00,  2.740e-02,  7.588e-01, -2.875e+00],
       [ 3.287e+00, -2.150e+00,  2.739e-03,  5.830e-01, -2.027e+00],
       ...,
       [ 3.029e+00, -2.287e+00,  8.232e-01,  1.601e+00, -3.428e+00],
       [ 5.996e-01, -2.311e+00,  3.338e+00,  5.654e-01, -2.354e+00],
       [ 1.851e+00, -2.674e+00,  2.270e+00,  1.332e+00, -3.076e+00]],
      dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-base-portuguese-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-base-portuguese-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-base-portuguese-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-base-portuguese-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-base-portuguese-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_tran

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.930533,0.51135,0.519888,0.490957,0.643124
2,1.086400,0.941561,0.563919,0.563991,0.530722,0.678912
3,0.757800,0.982193,0.573477,0.578637,0.540798,0.679506


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.840694613770568, metrics={'train_runtime': 309.4544, 'train_samples_per_second': 146.076, 'train_steps_per_second': 4.566, 'total_flos': 1881666784115928.0, 'train_loss': 0.840694613770568, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5734767025089605,
 'eval_f1': 0.5786372051933095,
 'eval_loss': 0.9821927547454834,
 'eval_precision': 0.5407975495462425,
 'eval_recall': 0.6795055364642993,
 'eval_runtime': 1.7349,
 'eval_samples_per_second': 482.456,
 'eval_steps_per_second': 15.563}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.3708,  2.951 , -2.82  , -0.389 , -0.2441],
       [ 2.482 , -1.916 ,  1.772 ,  1.009 , -3.129 ],
       [ 2.662 , -1.683 ,  0.561 ,  0.906 , -2.344 ],
       ...,
       [ 2.463 , -2.158 ,  1.035 ,  1.905 , -3.15  ],
       [ 0.8296, -2.41  ,  3.56  ,  0.68  , -2.459 ],
       [ 1.517 , -2.594 ,  2.467 ,  1.728 , -2.885 ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-base-portuguese-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-base-portuguese-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-base-portuguese-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-base-portuguese-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-base-portuguese-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Hyper

In [None]:
train_model_hyper(model_name)

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.0848,0.966433,0.594982,0.542655,0.622347,0.506537
2,0.7648,0.996963,0.597372,0.559421,0.584921,0.556668
3,0.6086,1.092008,0.608124,0.598246,0.584694,0.620953
4,0.5008,1.239289,0.592593,0.582328,0.576989,0.598212
5,0.4223,1.33816,0.57945,0.579239,0.564222,0.601224


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-0/checkpoint-942
Configuration saved in ./results/run-0/checkpoint-942/config.json
Model weights saved in ./results/run-0/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/run-0/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/run-0/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-0/checkpoint-1884
Configuration saved in ./results/run-0/checkpoint-1884/config.json
Model weights saved in ./results/run-0/checkpoint-1884/pytorch_model.bin
tokenizer config file saved in ./results/run-0/checkpoint-1884/tokenizer_config.json
Special tokens file saved in ./results/run-0/checkpoint-1884/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-0/checkpoint-282

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.347,1.34047,0.46356,0.126694,0.092712,0.2
2,1.3311,1.331116,0.46356,0.126694,0.092712,0.2
3,1.32,1.28801,0.489845,0.200371,0.189506,0.238431


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-1/checkpoint-942
Configuration saved in ./results/run-1/checkpoint-942/config.json
Model weights saved in ./results/run-1/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/run-1/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/run-1/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-1/checkpoint-1884
Configuration saved in ./results/run-1/checkpoint-1884/config.json
Model weights saved in ./results/run-1/checkpoint-1884/pytorch_model.bin
tokenizer config file saved in ./results/run-1/checkpoint-1884/tokenizer_config.json
Special tokens file saved in ./results/run-1/checkpoint-1884/special_tokens_map.json
***** Running Eva

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.3007,1.339401,0.46356,0.126694,0.092712,0.2
2,1.3326,1.33002,0.46356,0.126694,0.092712,0.2
3,1.3288,1.325716,0.46356,0.126694,0.092712,0.2
4,1.3326,1.327507,0.46356,0.126694,0.092712,0.2
5,1.3273,1.325111,0.46356,0.126694,0.092712,0.2


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-2/checkpoint-942
Configuration saved in ./results/run-2/checkpoint-942/config.json
Model weights saved in ./results/run-2/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/run-2/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/run-2/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-2/checkpoint-1884
Configuration saved in ./results/run-2/checkpoint-1884/config.json
Model weights saved in ./results/run-2/checkpoint-1884/pytorch_model.bin
tokenizer config file saved in ./results/run-2/checkpoint-1884/tokenizer_config.json
Special tokens file saved in ./results/run-2/checkpoint-1884/special_tokens_map.json
***** Running Eva

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.3477,1.334645,0.46356,0.126694,0.092712,0.2
2,1.3308,1.331655,0.46356,0.126694,0.092712,0.2
3,1.3273,1.32406,0.46356,0.126694,0.092712,0.2


***** Running Evaluation *****
  Num examples = 837
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-3/checkpoint-942
Configuration saved in ./results/run-3/checkpoint-942/config.json
Model weights saved in ./results/run-3/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/run-3/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/run-3/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/run-3/checkpoint-1884
Configuration saved in ./results/run-3/checkpoint-1884/config.json
Model weights saved in ./results/run-3/checkpoint-1884/pytorch_model.bin
tokenizer config file saved in ./results/run-3/checkpoint-1884/tokenizer_config.json
Special tokens file saved in ./results/run-3/checkpoint-1884/special_tokens_map.json
***** Running Eva

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.0837,0.971992,0.599761,0.543177,0.640419,0.49922
2,0.7653,0.998559,0.597372,0.559532,0.580702,0.559315
3,0.6102,1.090947,0.60454,0.593929,0.582201,0.612343
4,0.4997,1.244229,0.592593,0.580683,0.573272,0.599535
5,0.4212,1.345027,0.578256,0.572193,0.555721,0.595378


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-4/checkpoint-942
Configuration saved in ./results/run-4/checkpoint-942/config.json
Model weights saved in ./results/run-4/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/run-4/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/run-4/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-4/checkpoint-1884
Configuration saved in ./results/run-4/checkpoint-1884/config.json
Model weights saved in ./results/run-4/checkpoint-1884/pytorch_model.bin
tokenizer config file saved in ./results/run-4/checkpoint-1884/tokenizer_config.json
Special tokens file saved in ./results/run-4/checkpoint-1884/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/run-4/checkpoint-282

***** Hypertuning best run *****


BestRun(run_id='0', objective=0.5792388485370905, hyperparameters={'per_device_train_batch_size': 32, 'per_device_eval_batch_size': 32, 'weight_decay': 0.01, 'learning_rate': 2e-05, 'num_train_epochs': 5})

loading configuration file https://huggingface.co/neuralmind/bert-base-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/e716e2151985ba669e7197b64cdde2552acee146494d40ffaf0688a3f152e6ed.18a0b8b86f3ebd4c8a1d8d6199178feae9971ff5420f1d12f0ed8326ffdff716
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-base-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 

Epoch,Training Loss,Validation Loss


In [None]:
!rm -rf ./results/

### PT - [neuralmind/bert-large-portuguese-cased](https://huggingface.co/neuralmind/bert-large-portuguese-cased)


In [None]:
model_name = "neuralmind/bert-large-portuguese-cased"

In [None]:
train_model(model_name)

https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpwolmyslh


Downloading:   0%|          | 0.00/155 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/3a44fa9a74e90f509368a7f2789df38e1fedd153a52c62ef5cc5f4b0f5c99c2a.d61b68f744aef2741575c270d4ba0228cd35693bfa15d8babfb5c1079062d5d7
creating metadata file for /root/.cache/huggingface/transformers/3a44fa9a74e90f509368a7f2789df38e1fedd153a52c62ef5cc5f4b0f5c99c2a.d61b68f744aef2741575c270d4ba0228cd35693bfa15d8babfb5c1079062d5d7
https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpviajokxs


Downloading:   0%|          | 0.00/648 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
creating metadata file for /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
loading configuration file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-large-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  

Downloading:   0%|          | 0.00/205k [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/9cfcd25de0a333b1b5f4a3db227e93a806cfb041d93a49221eeaee6773eaa41c.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
creating metadata file for /root/.cache/huggingface/transformers/9cfcd25de0a333b1b5f4a3db227e93a806cfb041d93a49221eeaee6773eaa41c.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/added_tokens.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpsgfk7vad


Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/added_tokens.json in cache at /root/.cache/huggingface/transformers/6a3aa038873b8f0d0ab3a4de0a658f063b89e3afd815920a5f393c0e4ae84259.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b
creating metadata file for /root/.cache/huggingface/transformers/6a3aa038873b8f0d0ab3a4de0a658f063b89e3afd815920a5f393c0e4ae84259.5cc6e825eb228a7a5cfd27cb4d7151e97a79fb962b31aaf1813aa102e746584b
https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpvxy9owa4


Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/d5b721c156180bbbcc4a1017e8c72a18f8f96cdc178acec5ddcd45905712b4cf.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d
creating metadata file for /root/.cache/huggingface/transformers/d5b721c156180bbbcc4a1017e8c72a18f8f96cdc178acec5ddcd45905712b4cf.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d
loading file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/9cfcd25de0a333b1b5f4a3db227e93a806cfb041d93a49221eeaee6773eaa41c.af25fb1e29ad0175300146695fd80069be69b211c52fa5486fa8aae2754cc814
loading file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/tokenizer.json from cache at None
loading file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/added_tokens.json from cache

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-large-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
   

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

storing https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/016fb7702039667c9fb9dd2ceffaf04027b13e525a6248cda2a4a87dbb8687af.881d7200bce807f871637ac9d552c541b2d4b00146a0bf1ab0360f3640031273
creating metadata file for /root/.cache/huggingface/transformers/016fb7702039667c9fb9dd2ceffaf04027b13e525a6248cda2a4a87dbb8687af.881d7200bce807f871637ac9d552c541b2d4b00146a0bf1ab0360f3640031273
loading weights file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/016fb7702039667c9fb9dd2ceffaf04027b13e525a6248cda2a4a87dbb8687af.881d7200bce807f871637ac9d552c541b2d4b00146a0bf1ab0360f3640031273
Some weights of the model checkpoint at neuralmind/bert-large-portuguese-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.93314,0.609319,0.530659,0.661965,0.479169
2,1.018900,0.958596,0.620072,0.604083,0.618109,0.605055
3,0.741300,1.023891,0.616487,0.604026,0.597123,0.621664


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.7978428910154868, metrics={'train_runtime': 908.4605, 'train_samples_per_second': 49.759, 'train_steps_per_second': 1.555, 'total_flos': 6664694612106456.0, 'train_loss': 0.7978428910154868, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.6200716845878136,
 'eval_f1': 0.6040834571578513,
 'eval_loss': 0.9585959911346436,
 'eval_precision': 0.6181089324618736,
 'eval_recall': 0.6050553646429935,
 'eval_runtime': 4.3711,
 'eval_samples_per_second': 191.483,
 'eval_steps_per_second': 6.177}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.42   ,  1.717  , -2.535  , -0.5234 ,  0.01865],
       [ 3.045  , -1.982  , -0.3286 ,  1.792  , -3.297  ],
       [ 3.377  , -2.8    ,  0.811  ,  0.817  , -3.129  ],
       ...,
       [ 2.947  , -2.887  ,  0.612  ,  1.703  , -3.05   ],
       [ 1.689  , -3.295  ,  3.098  ,  0.6255 , -2.76   ],
       [ 2.512  , -3.197  ,  2.16   ,  1.627  , -3.541  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-large-portuguese-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-large-portuguese-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-large-portuguese-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-large-portuguese-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/neuralmind/bert-large-portuguese-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-large-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "output_past": true,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_t

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/neuralmind/bert-large-portuguese-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/c534071830642050813fa94003dbf1234413b3f1d5dc66d259fbc82ff7d5fd59.c8340a82acfbbcd2dd960b86d2886ee120b21896ef0294150f0391918ae6ced5
Model config BertConfig {
  "_name_or_path": "neuralmind/bert-large-portuguese-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
   

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.883668,0.593787,0.5923,0.553201,0.679242
2,1.030100,0.926085,0.57945,0.588638,0.550941,0.677452
3,0.686000,0.99719,0.596177,0.605407,0.569405,0.683026


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.7642106409201113, metrics={'train_runtime': 908.7137, 'train_samples_per_second': 49.745, 'train_steps_per_second': 1.555, 'total_flos': 6664694612106456.0, 'train_loss': 0.7642106409201113, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5961768219832736,
 'eval_f1': 0.6054071776170294,
 'eval_loss': 0.9971903562545776,
 'eval_precision': 0.5694053159109427,
 'eval_recall': 0.6830262368406699,
 'eval_runtime': 4.4413,
 'eval_samples_per_second': 188.456,
 'eval_steps_per_second': 6.079}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.8657,  3.812 , -2.768 , -0.3152, -1.829 ],
       [ 3.021 , -2.182 , -0.3164,  2.09  , -3.418 ],
       [ 3.008 , -3.111 ,  1.633 ,  1.261 , -3.318 ],
       ...,
       [ 2.314 , -3.12  ,  1.641 ,  1.635 , -3.033 ],
       [ 0.6284, -3.104 ,  4.16  ,  1.027 , -2.602 ],
       [ 1.993 , -3.428 ,  3.127 ,  1.451 , -3.361 ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-large-portuguese-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-large-portuguese-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-large-portuguese-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-large-portuguese-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/neuralmind/bert-large-portuguese-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

### PT - [monilouise/ner_news_portuguese ](https://huggingface.co/monilouise/ner_news_portuguese)

In [None]:
model_name = "monilouise/ner_news_portuguese"

In [None]:
train_model(model_name)

Downloading:   0%|          | 0.00/529 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/205k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/monilouise/ner_news_portuguese/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ee9b506d42deb9a1fde19bb7f40f64da74a2b871a58bfbfa55ccf29281aa9476.08868e7914995036411ed3728b7e905ee94d69938262ad30ffe4e5dac473f090
Model config BertConfig {
  "_name_or_path": "monilouise/ner_news_portuguese",
  "_num_labels": 13,
  "architectures": [
    "BertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 5

Downloading:   0%|          | 0.00/416M [00:00<?, ?B/s]

storing https://huggingface.co/monilouise/ner_news_portuguese/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/fb62c9a968a0d836c5a20bed42a138f9dab28c916e46281077cb009748c472b7.d38ca2e5cf614b94008f1c5e374f16ff65c9317c3b981e2cafe0390b585cf7b2
creating metadata file for /root/.cache/huggingface/transformers/fb62c9a968a0d836c5a20bed42a138f9dab28c916e46281077cb009748c472b7.d38ca2e5cf614b94008f1c5e374f16ff65c9317c3b981e2cafe0390b585cf7b2
loading weights file https://huggingface.co/monilouise/ner_news_portuguese/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/fb62c9a968a0d836c5a20bed42a138f9dab28c916e46281077cb009748c472b7.d38ca2e5cf614b94008f1c5e374f16ff65c9317c3b981e2cafe0390b585cf7b2
All model checkpoint weights were used when initializing BertForSequenceClassification.

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monilouise/ner_news_portuguese and are newly initialized be

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.949752,0.603345,0.554669,0.626173,0.517015
2,1.044100,0.963784,0.60693,0.599824,0.612758,0.600058
3,0.803500,0.996435,0.60454,0.596391,0.594946,0.607471


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.8579645177108214, metrics={'train_runtime': 300.3196, 'train_samples_per_second': 150.52, 'train_steps_per_second': 4.705, 'total_flos': 1881666784115928.0, 'train_loss': 0.8579645177108214, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.6069295101553166,
 'eval_f1': 0.5998235101021614,
 'eval_loss': 0.9637839198112488,
 'eval_precision': 0.6127578604216987,
 'eval_recall': 0.600057819233077,
 'eval_runtime': 1.6364,
 'eval_samples_per_second': 511.49,
 'eval_steps_per_second': 16.5}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.531  ,  2.014  , -2.588  , -0.5044 , -0.635  ],
       [ 3.25   , -1.405  ,  0.09534,  0.683  , -2.826  ],
       [ 3.238  , -1.967  ,  0.798  ,  0.3672 , -2.3    ],
       ...,
       [ 3.166  , -1.813  ,  0.238  ,  1.151  , -2.86   ],
       [ 0.7837 , -2.408  ,  2.787  ,  0.5234 , -2.393  ],
       [ 2.486  , -2.3    ,  1.229  ,  1.451  , -3.18   ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/monilouise/ner_news_portuguese
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/monilouise/ner_news_portuguese/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/monilouise/ner_news_portuguese/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/monilouise/ner_news_portuguese/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/monilouise/ner_news_portuguese/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/monilouise/ner_news_portuguese/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ee9b506d42deb9a1fde19bb7f40f64da74a2b871a58bfbfa55ccf29281aa9476.08868e7914995036411ed3728b7e905ee94d69938262ad30ffe4e5dac473f090
Model config BertConfig {
  "_name_or_path": "monilouise/ner_news_portuguese",
  "_num_labels": 13,
  "architectures": [
    "BertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "L-ORG",
    "1": "O",
    "2": "I-PESSOA",
    "3": "I-ORG",
    "4": "B-PUB",
    "5": "L-PUB",
    "6": "I-LOC",
    "7": "B-PESSOA",
    "8": "L-LOC",
    "9": "B-LOC",
    "10": "B-ORG",
    "11": "L-PESSOA",
    "12": "I-PUB"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-LO

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/monilouise/ner_news_portuguese/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ee9b506d42deb9a1fde19bb7f40f64da74a2b871a58bfbfa55ccf29281aa9476.08868e7914995036411ed3728b7e905ee94d69938262ad30ffe4e5dac473f090
Model config BertConfig {
  "_name_or_path": "monilouise/ner_news_portuguese",
  "_num_labels": 13,
  "architectures": [
    "BertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LAB

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.956204,0.516129,0.521396,0.493274,0.641177
2,1.094300,0.941932,0.557945,0.562568,0.526983,0.676984
3,0.761900,1.00043,0.571087,0.576508,0.538739,0.673989


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.8451679551002411, metrics={'train_runtime': 303.8545, 'train_samples_per_second': 148.769, 'train_steps_per_second': 4.65, 'total_flos': 1881666784115928.0, 'train_loss': 0.8451679551002411, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5710872162485066,
 'eval_f1': 0.5765075735510389,
 'eval_loss': 1.0004297494888306,
 'eval_precision': 0.5387391221172878,
 'eval_recall': 0.6739892543500792,
 'eval_runtime': 1.6434,
 'eval_samples_per_second': 509.322,
 'eval_steps_per_second': 16.43}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.4407,  3.342 , -2.875 ,  0.4666, -1.863 ],
       [ 2.213 , -2.145 ,  2.215 ,  0.8574, -3.184 ],
       [ 2.713 , -1.936 ,  0.7695,  1.063 , -2.559 ],
       ...,
       [ 2.426 , -1.882 ,  0.551 ,  1.99  , -2.928 ],
       [ 0.7124, -2.414 ,  3.38  ,  0.737 , -2.824 ],
       [ 1.394 , -2.662 ,  2.398 ,  1.911 , -3.217 ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/monilouise/ner_news_portuguese
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/monilouise/ner_news_portuguese/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/monilouise/ner_news_portuguese/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/monilouise/ner_news_portuguese/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/monilouise/ner_news_portuguese/special_tokens_map.json


In [None]:
!rm -rf ./results/

###  Multi - [bert-base-multilingual-cased](https://huggingface.co/bert-base-multilingual-cased)

In [None]:
model_name = "bert-base-multilingual-cased"

In [None]:
train_model(model_name)

https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2al83lmm


Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f
creating metadata file for /root/.cache/huggingface/transformers/f55e7a2ad4f8d0fff2733b3f79777e1e99247f2e4583703e92ce74453af8c235.ec5c189f89475aac7d8cbd243960a0655cfadc3d0474da8ff2ed0bf1699c2a5f
https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpfchbbi08


Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
creating metadata file for /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidde

Downloading:   0%|          | 0.00/972k [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29
creating metadata file for /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29
https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpusfjny8y


Downloading:   0%|          | 0.00/1.87M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24
creating metadata file for /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449d18dc24
loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/eff018e45de5364a8368df1f2df3461d506e2a111e9dd50af1fae061cd460ead.6c5b6600e968f4b5e08c86d8891ea99e51537fc2bf251435fb46922e8f7a7b29
loading file https://huggingface.co/bert-base-multilingual-cased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/46880f3b0081fda494a4e15b05787692aa4c1e21e0ff2428ba8b14d4eda0784d.b33e51591f94f17c238ee9b1fac75b96ff2678cbaed6e108feadb3449

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },


Downloading:   0%|          | 0.00/681M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052
creating metadata file for /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052
loading weights file https://huggingface.co/bert-base-multilingual-cased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/0a3fd51713dcbb4def175c7f85bddc995d5976ce1dde327f99104e4d33069f17.aa7be4c79d76f4066d9b354496ea477c9ee39c5d889156dd1efb680643c2b052
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.pred

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.081,0.543608,0.439816,0.537474,0.432329
2,1.143400,1.02547,0.585424,0.52431,0.583514,0.505393
3,0.918000,1.050461,0.574671,0.542957,0.551658,0.543422


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.9594327652614671, metrics={'train_runtime': 360.0145, 'train_samples_per_second': 125.562, 'train_steps_per_second': 3.925, 'total_flos': 1881666784115928.0, 'train_loss': 0.9594327652614671, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5746714456391876,
 'eval_f1': 0.5429565515611482,
 'eval_loss': 1.0504605770111084,
 'eval_precision': 0.5516580674799236,
 'eval_recall': 0.5434216985763377,
 'eval_runtime': 1.6511,
 'eval_samples_per_second': 506.935,
 'eval_steps_per_second': 16.353}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.732 , -0.18  ,  0.839 ,  1.183 , -2.44  ],
       [ 3.217 , -0.954 ,  0.1229,  0.5415, -2.51  ],
       [ 2.777 , -1.399 ,  1.224 , -0.721 , -1.147 ],
       ...,
       [ 2.66  , -1.473 , -0.0625,  1.548 , -2.26  ],
       [ 0.4626, -1.438 ,  3.56  , -0.3323, -1.701 ],
       [ 2.79  , -1.834 ,  1.816 ,  0.645 , -2.875 ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "abs

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.113998,0.489845,0.479083,0.449796,0.557282
2,1.268700,1.084257,0.508961,0.501016,0.470322,0.600913
3,0.935800,1.110462,0.517324,0.518776,0.484929,0.60577


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=1.0047426648959992, metrics={'train_runtime': 361.4819, 'train_samples_per_second': 125.052, 'train_steps_per_second': 3.909, 'total_flos': 1881666784115928.0, 'train_loss': 1.0047426648959992, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5173237753882916,
 'eval_f1': 0.518776468272993,
 'eval_loss': 1.1104624271392822,
 'eval_precision': 0.4849293392693143,
 'eval_recall': 0.6057699230895107,
 'eval_runtime': 1.6164,
 'eval_samples_per_second': 517.819,
 'eval_steps_per_second': 16.704}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.761  ,  0.0902 ,  1.144  , -0.639  , -1.148  ],
       [ 2.217  ,  0.5137 , -0.593  ,  0.8994 , -2.08   ],
       [ 1.788  , -1.356  ,  1.9375 ,  0.1042 , -2.115  ],
       ...,
       [ 0.5757 , -1.904  ,  2.857  ,  0.9937 , -2.287  ],
       [ 0.3494 , -1.272  ,  3.484  ,  0.01202, -1.701  ],
       [ 1.732  , -1.469  ,  1.455  ,  1.344  , -2.695  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-cased/special_tokens_map.json


In [None]:
!rm -rf ./results/

###  Multi - [bert-base-multilingual-uncased](https://huggingface.co/bert-base-multilingual-uncased)

In [None]:
model_name = "bert-base-multilingual-uncased"

In [None]:
train_model(model_name)

https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp1pl8vt4i


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/1b935b135ddb021a7d836c00f5702b80d11d348fd5c5a42cbd933d8ed1f55be9.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
creating metadata file for /root/.cache/huggingface/transformers/1b935b135ddb021a7d836c00f5702b80d11d348fd5c5a42cbd933d8ed1f55be9.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79
https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpodolx7gr


Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
creating metadata file for /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
loading configuration file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt in cache at /root/.cache/huggingface/transformers/269f2943d168a4cd2ddf3864cee89d7f7d78873b3d14a1229174d37212981a38.92022aa29ab6663b0b4254744f28ab43e6adf4deebe0f26651e6c61f28f69d8b
creating metadata file for /root/.cache/huggingface/transformers/269f2943d168a4cd2ddf3864cee89d7f7d78873b3d14a1229174d37212981a38.92022aa29ab6663b0b4254744f28ab43e6adf4deebe0f26651e6c61f28f69d8b
https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpios4keps


Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/857db185d48b92f3e6141ef5092d8d5dbebab7eef1bacc6c9eaf85cf23807641.73ad1f9fd9f94089672128003fb4a687b64b73b2bfb8d08766bbc71feec8cd96
creating metadata file for /root/.cache/huggingface/transformers/857db185d48b92f3e6141ef5092d8d5dbebab7eef1bacc6c9eaf85cf23807641.73ad1f9fd9f94089672128003fb4a687b64b73b2bfb8d08766bbc71feec8cd96
loading file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/269f2943d168a4cd2ddf3864cee89d7f7d78873b3d14a1229174d37212981a38.92022aa29ab6663b0b4254744f28ab43e6adf4deebe0f26651e6c61f28f69d8b
loading file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/857db185d48b92f3e6141ef5092d8d5dbebab7eef1bacc6c9eaf85cf23807641.73ad1f9fd9f94089672128003fb4a687b64b73b2bfb8d08766b

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
 

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

storing https://huggingface.co/bert-base-multilingual-uncased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/37f730c9dc4fc13ab6bf412fdc0ad936241a39a70628c2d4a85a607ea775b865.a458b2dad7b293099dd815628e032e6c22519889d75f13d6f244dbe068525a56
creating metadata file for /root/.cache/huggingface/transformers/37f730c9dc4fc13ab6bf412fdc0ad936241a39a70628c2d4a85a607ea775b865.a458b2dad7b293099dd815628e032e6c22519889d75f13d6f244dbe068525a56
loading weights file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/37f730c9dc4fc13ab6bf412fdc0ad936241a39a70628c2d4a85a607ea775b865.a458b2dad7b293099dd815628e032e6c22519889d75f13d6f244dbe068525a56
Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cl

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.038872,0.562724,0.486626,0.557846,0.462928
2,1.127300,1.018981,0.591398,0.544229,0.585241,0.530421
3,0.899400,1.030838,0.602151,0.578111,0.588651,0.576859


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.9431354498407644, metrics={'train_runtime': 350.8902, 'train_samples_per_second': 128.827, 'train_steps_per_second': 4.027, 'total_flos': 1881666784115928.0, 'train_loss': 0.9431354498407644, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.6021505376344086,
 'eval_f1': 0.5781114357419681,
 'eval_loss': 1.0308383703231812,
 'eval_precision': 0.5886512827982993,
 'eval_recall': 0.5768592156221023,
 'eval_runtime': 1.6019,
 'eval_samples_per_second': 522.516,
 'eval_steps_per_second': 16.855}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 1.361  ,  1.08   , -1.794  , -1.087  ,  0.084  ],
       [ 3.03   , -1.139  , -0.4194 , -0.02048, -2.172  ],
       [ 1.711  , -1.693  ,  2.76   , -0.1273 , -2.516  ],
       ...,
       [ 2.207  , -2.424  ,  1.112  ,  0.5923 , -2.213  ],
       [ 0.6587 , -1.643  ,  2.96   ,  0.10114, -2.295  ],
       [ 2.465  , -1.575  ,  0.5103 ,  1.292  , -3.406  ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-uncased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-uncased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-uncased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-uncased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/bert-base-multilingual-uncased/special_tokens_map.json


In [None]:
!rm -rf ./results/

#### Custom

In [None]:
train_model_custom(model_name)

loading configuration file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": 

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
loading configuration file https://huggingface.co/bert-base-multilingual-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/af4e101d208f361f141144dca21e9c4148aaf0e85441c2e335743d10829c6cad.d63adade93e44e64bedd306ec82ffd33eedabaf0ff08aabe581acaa48616a508
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
 

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,1.107878,0.504182,0.505696,0.477432,0.592613
2,1.250800,1.03071,0.519713,0.520268,0.484102,0.623823
3,0.928800,1.095016,0.528076,0.532456,0.498472,0.615557


***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-471
Configuration saved in ./results/checkpoint-471/config.json
Model weights saved in ./results/checkpoint-471/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-471/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-471/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-942
Configuration saved in ./results/checkpoint-942/config.json
Model weights saved in ./results/checkpoint-942/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-942/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-942/special_tokens_map.json
***** Running Evaluation *****
  Num examples = 837
  Batch size = 32
Saving model checkpoint to ./results/checkpoint-1413
Configuration saved in ./results/checkpoint-1413/config.json
Model we

TrainOutput(global_step=1413, training_loss=0.99645607334793, metrics={'train_runtime': 351.5072, 'train_samples_per_second': 128.6, 'train_steps_per_second': 4.02, 'total_flos': 1881666784115928.0, 'train_loss': 0.99645607334793, 'epoch': 3.0})

***** Running Evaluation *****
  Num examples = 837
  Batch size = 32


{'epoch': 3.0,
 'eval_accuracy': 0.5280764635603346,
 'eval_f1': 0.5324557256432719,
 'eval_loss': 1.0950160026550293,
 'eval_precision': 0.4984722162652903,
 'eval_recall': 0.6155574646811761,
 'eval_runtime': 1.6271,
 'eval_samples_per_second': 514.417,
 'eval_steps_per_second': 16.594}

***** Running Prediction *****
  Num examples = 838
  Batch size = 32


PredictionOutput(predictions=array([[ 0.8643,  2.037 , -0.562 , -0.3137, -1.917 ],
       [ 1.85  , -1.251 ,  0.8174, -0.4863, -1.512 ],
       [ 0.86  , -1.485 ,  3.195 ,  0.1565, -2.578 ],
       ...,
       [ 1.356 , -2.24  ,  1.709 ,  0.4204, -1.677 ],
       [ 0.279 , -1.377 ,  3.473 ,  0.1666, -2.432 ],
       [ 1.355 , -1.665 ,  1.236 ,  1.179 , -2.604 ]], dtype=float16), label_ids=array([0, 0, 0, 0, 2, 0, 0, 0, 3, 1, 3, 2, 3, 2, 3, 1, 0, 0, 2, 0, 0, 2,
       1, 0, 2, 0, 2, 2, 0, 0, 3, 0, 3, 0, 0, 0, 4, 2, 0, 0, 0, 3, 3, 0,
       0, 2, 0, 3, 3, 2, 2, 4, 0, 0, 1, 3, 0, 3, 0, 0, 3, 0, 0, 0, 3, 0,
       0, 0, 0, 0, 1, 2, 0, 0, 2, 4, 3, 4, 0, 0, 1, 0, 0, 0, 3, 0, 2, 0,
       0, 2, 1, 2, 3, 4, 0, 4, 0, 3, 3, 0, 0, 0, 0, 2, 0, 2, 3, 0, 0, 0,
       0, 0, 1, 3, 3, 0, 2, 0, 2, 2, 0, 1, 3, 0, 0, 0, 0, 0, 4, 0, 2, 0,
       3, 3, 0, 0, 0, 2, 1, 1, 0, 0, 0, 2, 0, 3, 3, 0, 0, 1, 0, 3, 0, 0,
       0, 3, 0, 0, 0, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 3, 0, 1, 0, 0, 0, 2,
       0, 1, 0, 2, 3, 3,

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-uncased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-uncased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-uncased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-uncased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/baseline/custom/bert-base-multilingual-uncased/special_tokens_map.json


In [None]:
!rm -rf ./results/