In [None]:
!pip install pandas
!pip install datasets
!pip install transformers
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113

## Preparing our Data

### Loading dataset

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Load previously split dataset

In [3]:
import pandas as pd

def load_process_data(filename):
  dataset = pd.read_excel(filename)
  dataset = dataset.drop(columns=['article_id', 'annotator', 'node','ranges'])
  dataset.replace(['Value', 'Value(+)', 'Value(-)', 'Fact', 'Policy'],[0,1,2,3,4], inplace=True)

  return dataset

In [4]:
# 80% train
train_set = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/OpArticles_ADUs_train.xlsx')

# 20% test
test_set = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/OpArticles_ADUs_test.xlsx')

print(train_set.info())
print(test_set.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13394 entries, 0 to 13393
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tokens  13394 non-null  object
 1   label   13394 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 209.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3349 entries, 0 to 3348
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tokens  3349 non-null   object
 1   label   3349 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 52.5+ KB
None


Load translated train sets

In [5]:
train_en = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_en.xlsx')
train_es = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_es.xlsx')
train_ga = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_ga.xlsx')
train_fr = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_fr.xlsx')
train_it = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_it.xlsx')
train_ro = load_process_data('/content/drive/Shareddrives/PLN/Assignment 2/data/multilingual/OpArticles_ADUs_train_ro.xlsx')

train_en.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13394 entries, 0 to 13393
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   tokens  13394 non-null  object
 1   label   13394 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 209.4+ KB


For ease of usage with Transformer models, we convert the dataset into a Hugging Face dataset and split it into train, validation and test sets.

In [25]:
from datasets import Dataset
from datasets import DatasetDict

train_hf = Dataset.from_pandas(train_set)
test_hf = Dataset.from_pandas(test_set)

train_en_hf = Dataset.from_pandas(train_en)
train_es_hf = Dataset.from_pandas(train_es)
train_ga_hf = Dataset.from_pandas(train_ga)
train_fr_hf = Dataset.from_pandas(train_fr)
train_it_hf = Dataset.from_pandas(train_it)
train_ro_hf = Dataset.from_pandas(train_ro)

# Split 10% valid / 10% test
valid_test = test_hf.train_test_split(test_size=0.5, shuffle=True, seed=42)

train_valid_test_dataset = DatasetDict({
    'train_pt': train_hf,
    'validation': valid_test['train'],
    'test': valid_test['test'],
    'train_en': train_en_hf,
    'train_es': train_es_hf,
    'train_ga': train_ga_hf,
    'train_fr': train_fr_hf,
    'train_it': train_it_hf,
    'train_ro': train_ro_hf
})

train_valid_test_dataset

DatasetDict({
    train_pt: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    validation: Dataset({
        features: ['tokens', 'label'],
        num_rows: 1674
    })
    test: Dataset({
        features: ['tokens', 'label'],
        num_rows: 1675
    })
    train_en: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    train_es: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    train_ga: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    train_fr: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    train_it: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
    train_ro: Dataset({
        features: ['tokens', 'label'],
        num_rows: 13394
    })
})

## Fine-tuning a pretrained model

### Tokenizer

We first load the tokenizer for our model:

In [7]:
from transformers import AutoTokenizer

def get_tokenizer(name):
    return AutoTokenizer.from_pretrained(name)

Now we need to [preprocess](https://huggingface.co/docs/transformers/preprocessing) our data.

Obtaining the length of the longest sequences in our data splits

In [8]:
def find_max_length(dataset):
    return len(max(dataset, key=lambda x: len(x.split())).split())

find_max_length(train_en_hf["tokens"])

86

Tokenize entire dataset

In [9]:
# Define tokenizer later
tokenizer = None

def tokenize_dataset(sample):
    return tokenizer(sample["tokens"], truncation=True, max_length=86, padding="max_length")

def get_tokenized_data(dataset):
    return dataset.map(tokenize_dataset, batched=True)

### Loading the model

Since we want to use the model for classification, we should load it with an appropriate classification head:

In [10]:
from transformers import AutoModelForSequenceClassification
import torch

def get_model(name):
    model = AutoModelForSequenceClassification.from_pretrained(name, num_labels=5)
    model.cuda() # Use GPU

    return model

### Fine-tuning

The next step is to [fine-tune](https://huggingface.co/docs/transformers/training) the model with our train data. To do so, we can make use of a [Trainer](https://huggingface.co/docs/transformers/main_classes/trainer).
There are several aspects of training that you can specify via [TrainingArguments](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.TrainingArguments).

In [11]:
from transformers import TrainingArguments, Trainer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

def get_trainingArgs():
    return TrainingArguments(
        output_dir="./results",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3  ,
        weight_decay=0.01,
        data_seed=42,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="f1"
    )

def get_trainer(model_, args_, train_dataset_, eval_dataset_, tokenizer_, data_collator_, compute_metrics_):
    return Trainer(
        model=model_,
        args=args_,
        train_dataset=train_dataset_,
        eval_dataset=eval_dataset_,
        tokenizer=tokenizer_,
        data_collator=data_collator_,
        compute_metrics=compute_metrics_
    )

### Train, evaluate, predict, save with Zero-shot

In [26]:
from transformers import DataCollatorWithPadding
from IPython.display import display

model_name = "bert-base-multilingual-cased"

tokenizer = get_tokenizer(model_name)
tokenized_dataset = get_tokenized_data(train_valid_test_dataset)
model = get_model(model_name)

def zero_shot(lang):
  trainer = get_trainer(
    model,
    get_trainingArgs(),
    tokenized_dataset['train_'+lang],
    tokenized_dataset['validation'],
    tokenizer,
    DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics
    )
  
  # Train Model
  display(trainer.train())

  # Check performance in validation set
  display(trainer.evaluate())

  # Check how the model fares in our test set.
  display(trainer.predict(test_dataset=tokenized_dataset["test"]))

  # Save model for future use
  trainer.save_model('/content/drive/Shareddrives/PLN/Assignment 2/models/' + lang + '/' + model_name)

loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "position_embedding_type": "abs

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

  0%|          | 0/14 [00:00<?, ?ba/s]

loading configuration file https://huggingface.co/bert-base-multilingual-cased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/6c4a5d81a58c9791cdf76a09bce1b5abfb9cf958aebada51200f4515403e5d08.0fe59f3f4f1335dadeb4bce8b8146199d9083512b50d07323c1c319f96df450c
Model config BertConfig {
  "_name_or_path": "bert-base-multilingual-cased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_

### Zero-shot several languages

#### Portuguese

In [27]:
zero_shot("pt")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.1823,1.009982,0.580048,0.505474,0.599145,0.467789
2,0.9097,1.016058,0.586619,0.554393,0.587447,0.546993
3,0.753,1.070261,0.594385,0.563842,0.574537,0.557672


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.9336443349493811, metrics={'train_runtime': 811.4625, 'train_samples_per_second': 49.518, 'train_steps_per_second': 3.098, 'total_flos': 1775868623519544.0, 'train_loss': 0.9336443349493811, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5943847072879331,
 'eval_f1': 0.5638418745897852,
 'eval_loss': 1.0702614784240723,
 'eval_precision': 0.5745369583803732,
 'eval_recall': 0.5576719670851249,
 'eval_runtime': 9.2474,
 'eval_samples_per_second': 181.024,
 'eval_steps_per_second': 11.355}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 2.3569963 , -0.5056619 , -1.5216085 ,  1.9893084 , -2.7937179 ],
       [ 2.4618914 , -1.4521484 ,  1.1548529 ,  0.32186386, -2.621744  ],
       [ 0.82349795, -2.5367563 ,  2.99239   ,  0.5507926 , -1.5780839 ],
       ...,
       [ 1.4572164 ,  1.5743827 , -2.5829337 ,  1.389753  , -2.364238  ],
       [ 1.2282764 , -1.9081117 ,  3.0067067 , -0.09163611, -2.1226697 ],
       [ 1.3913858 ,  1.5657791 , -2.886854  , -1.8013538 ,  1.7046494 ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.0826913118362427, 'test_accuracy': 0.5934328358208956, 'test_f1': 0.5515175106837085, 'test_precision': 0.5636671987230646, 'test_recall': 0.5443049293049294, 'test_runtime': 9.4376, 'test_samples_per_second': 177.482, 'test_steps_per_second': 11.126})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/pt/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/pt/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/pt/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/pt/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/pt/bert-base-multilingual-cased/special_tokens_map.json


In [28]:
!rm -rf ./results/

#### English

In [13]:
zero_shot("en")

The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.1631,1.087873,0.560335,0.394698,0.625832,0.362509
2,0.8869,1.076017,0.555556,0.462441,0.568675,0.429809
3,0.735,1.115303,0.554361,0.469818,0.538859,0.437176


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.9153515110911125, metrics={'train_runtime': 806.8399, 'train_samples_per_second': 49.802, 'train_steps_per_second': 3.116, 'total_flos': 1775868623519544.0, 'train_loss': 0.9153515110911125, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5543608124253285,
 'eval_f1': 0.46981812596609734,
 'eval_loss': 1.1153031587600708,
 'eval_precision': 0.5388590673673541,
 'eval_recall': 0.4371758333048151,
 'eval_runtime': 9.2489,
 'eval_samples_per_second': 180.995,
 'eval_steps_per_second': 11.353}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 2.2730577 , -0.38209984, -1.485879  ,  2.0117543 , -2.7955055 ],
       [ 2.3029656 , -1.8601569 ,  1.2731074 ,  0.7600737 , -2.9897532 ],
       [ 0.5340132 , -2.0112958 ,  3.2515519 , -0.24893504, -1.7813315 ],
       ...,
       [ 1.515837  ,  2.273377  , -2.0447404 ,  0.9392453 , -2.4451215 ],
       [ 0.5097926 , -1.7330992 ,  1.5644592 , -0.17139944, -0.7610098 ],
       [ 1.7860159 ,  0.7420413 , -2.09492   , -1.8021395 ,  1.4617672 ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.0893518924713135, 'test_accuracy': 0.5749253731343283, 'test_f1': 0.47280775750415244, 'test_precision': 0.5577648102842093, 'test_recall': 0.4400215165669711, 'test_runtime': 9.4491, 'test_samples_per_second': 177.266, 'test_steps_per_second': 11.112})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/en/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/en/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/en/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/en/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/en/bert-base-multilingual-cased/special_tokens_map.json


In [14]:
!rm -rf ./results/

#### Spanish

In [15]:
zero_shot("es")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.9845,1.024782,0.580645,0.482984,0.594924,0.443693
2,0.758,1.097132,0.563321,0.510496,0.576393,0.490694
3,0.6235,1.183407,0.549582,0.515523,0.535688,0.504253


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.7853347490776506, metrics={'train_runtime': 811.3131, 'train_samples_per_second': 49.527, 'train_steps_per_second': 3.099, 'total_flos': 1775868623519544.0, 'train_loss': 0.7853347490776506, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5495818399044206,
 'eval_f1': 0.515523230907694,
 'eval_loss': 1.1834070682525635,
 'eval_precision': 0.5356880009031807,
 'eval_recall': 0.5042531374513051,
 'eval_runtime': 9.2882,
 'eval_samples_per_second': 180.23,
 'eval_steps_per_second': 11.305}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 3.2260716 , -0.74787545, -2.0615969 ,  1.8557225 , -2.879161  ],
       [ 2.3072426 , -1.9754125 ,  2.298366  ,  0.3516692 , -3.4420042 ],
       [ 0.93096703, -2.6186986 ,  3.1016548 ,  0.5749084 , -2.4945393 ],
       ...,
       [ 1.7521371 ,  2.3677483 , -2.6919951 ,  1.5470537 , -2.913363  ],
       [ 0.5957954 , -2.3890164 ,  4.053494  , -0.31822258, -2.3912656 ],
       [ 1.4082956 ,  0.58619773, -2.455303  , -2.524176  ,  3.1378322 ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.1516385078430176, 'test_accuracy': 0.5647761194029851, 'test_f1': 0.5236365310584083, 'test_precision': 0.5428681807360272, 'test_recall': 0.5099955515410062, 'test_runtime': 9.5355, 'test_samples_per_second': 175.659, 'test_steps_per_second': 11.011})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/es/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/es/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/es/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/es/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/es/bert-base-multilingual-cased/special_tokens_map.json


In [16]:
!rm -rf ./results/

#### Galician

In [17]:
zero_shot("ga")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7444,1.164826,0.559737,0.483457,0.538671,0.454764
2,0.5909,1.226598,0.55675,0.528548,0.542557,0.523747
3,0.5046,1.361347,0.539427,0.513961,0.522782,0.506512


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.6061285202144723, metrics={'train_runtime': 816.7086, 'train_samples_per_second': 49.2, 'train_steps_per_second': 3.078, 'total_flos': 1775868623519544.0, 'train_loss': 0.6061285202144723, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5567502986857825,
 'eval_f1': 0.5285476726535665,
 'eval_loss': 1.2265983819961548,
 'eval_precision': 0.5425565714851429,
 'eval_recall': 0.52374654088472,
 'eval_runtime': 9.2198,
 'eval_samples_per_second': 181.566,
 'eval_steps_per_second': 11.389}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 2.979091  , -1.6636992 , -1.5398701 ,  2.212641  , -2.9077034 ],
       [ 2.336499  , -2.0212781 ,  1.7375876 ,  1.0090541 , -3.6609035 ],
       [ 1.2810286 , -3.0915082 ,  2.955997  ,  0.78869104, -2.680071  ],
       ...,
       [ 1.4296207 ,  3.4225059 , -3.0995514 ,  0.9431521 , -2.6815386 ],
       [ 0.9804    , -3.0449114 ,  4.3757415 , -0.06949195, -2.7023401 ],
       [ 1.1725712 , -0.05328814, -2.0808673 , -2.8646948 ,  3.789807  ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.2057018280029297, 'test_accuracy': 0.5582089552238806, 'test_f1': 0.5175554917980448, 'test_precision': 0.5426340409432638, 'test_recall': 0.5034765782947601, 'test_runtime': 9.4604, 'test_samples_per_second': 177.055, 'test_steps_per_second': 11.099})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/ga/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ga/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ga/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ga/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ga/bert-base-multilingual-cased/special_tokens_map.json


In [18]:
!rm -rf ./results/

#### French

In [19]:
zero_shot("fr")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8044,1.102363,0.555556,0.525114,0.518421,0.543867
2,0.5954,1.213794,0.55675,0.526468,0.532517,0.538058
3,0.5075,1.361052,0.53405,0.507577,0.499313,0.518719


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.6337519886954588, metrics={'train_runtime': 816.5466, 'train_samples_per_second': 49.21, 'train_steps_per_second': 3.079, 'total_flos': 1775868623519544.0, 'train_loss': 0.6337519886954588, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5567502986857825,
 'eval_f1': 0.526468372770523,
 'eval_loss': 1.2137943506240845,
 'eval_precision': 0.5325168420360612,
 'eval_recall': 0.5380581959720734,
 'eval_runtime': 9.2053,
 'eval_samples_per_second': 181.852,
 'eval_steps_per_second': 11.406}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 2.7789636 , -1.8320004 , -1.3612224 ,  2.4146144 , -2.9325416 ],
       [ 1.7230608 , -1.9590844 ,  2.178066  ,  1.0589224 , -3.982875  ],
       [ 1.370651  , -3.4718704 ,  3.4706452 ,  0.75558776, -2.8990152 ],
       ...,
       [ 2.3340838 ,  2.5653589 , -2.4022202 ,  0.3235347 , -3.1236992 ],
       [ 1.0710703 , -3.016546  ,  4.45786   , -0.10570648, -2.9112284 ],
       [ 1.4195867 , -1.3453461 , -1.5903379 , -2.4765651 ,  4.0151343 ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.2258025407791138, 'test_accuracy': 0.5408955223880597, 'test_f1': 0.5053307095536879, 'test_precision': 0.5130058664671668, 'test_recall': 0.5128002652548107, 'test_runtime': 9.4301, 'test_samples_per_second': 177.623, 'test_steps_per_second': 11.135})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/fr/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/fr/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/fr/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/fr/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/fr/bert-base-multilingual-cased/special_tokens_map.json


In [20]:
!rm -rf ./results/

#### Italian

In [21]:
zero_shot("it")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6722,1.22986,0.556153,0.502326,0.539126,0.479781
2,0.5188,1.332829,0.531661,0.505466,0.506375,0.510227
3,0.4749,1.454119,0.532258,0.511354,0.507593,0.519838


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.5504074250314571, metrics={'train_runtime': 811.2669, 'train_samples_per_second': 49.53, 'train_steps_per_second': 3.099, 'total_flos': 1775868623519544.0, 'train_loss': 0.5504074250314571, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.532258064516129,
 'eval_f1': 0.5113536969913994,
 'eval_loss': 1.454119086265564,
 'eval_precision': 0.5075932687956994,
 'eval_recall': 0.5198380024085583,
 'eval_runtime': 9.3285,
 'eval_samples_per_second': 179.45,
 'eval_steps_per_second': 11.256}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 4.390415  , -2.5091972 , -1.5952117 ,  1.3535274 , -3.4117403 ],
       [ 2.0807445 , -2.2641907 ,  2.285258  ,  0.63960934, -3.8179755 ],
       [ 0.9006915 , -3.0546267 ,  4.2198734 ,  0.13133536, -2.7574365 ],
       ...,
       [ 2.2087047 ,  4.016245  , -3.214292  , -0.44390032, -2.763296  ],
       [ 0.47694308, -2.6573312 ,  4.9704366 ,  0.05897301, -3.2288368 ],
       [ 1.3633063 , -1.3371292 , -1.7399693 , -2.5681915 ,  4.232589  ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.4337358474731445, 'test_accuracy': 0.5605970149253732, 'test_f1': 0.5268435390284084, 'test_precision': 0.5303695094067737, 'test_recall': 0.5268314871042143, 'test_runtime': 9.5289, 'test_samples_per_second': 175.781, 'test_steps_per_second': 11.019})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/it/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/it/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/it/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/it/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/it/bert-base-multilingual-cased/special_tokens_map.json


In [22]:
!rm -rf ./results/

#### Romanian

In [23]:
zero_shot("ro")

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 13394
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2514


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7734,1.236294,0.544803,0.494346,0.536383,0.470115
2,0.5789,1.358112,0.541816,0.508635,0.522934,0.499847
3,0.4991,1.50388,0.523895,0.495132,0.504056,0.487778


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16
Saving model checkpoint to ./results/checkpoint-838
Configuration saved in ./results/checkpoint-838/config.json
Model weights saved in ./results/checkpoint-838/pytorch_model.bin
tokenizer config file saved in ./results/checkpoint-838/tokenizer_config.json
Special tokens file saved in ./results/checkpoint-838/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


TrainOutput(global_step=2514, training_loss=0.6147597896360459, metrics={'train_runtime': 815.5705, 'train_samples_per_second': 49.269, 'train_steps_per_second': 3.083, 'total_flos': 1775868623519544.0, 'train_loss': 0.6147597896360459, 'epoch': 3.0})

The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1674
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.5418160095579451,
 'eval_f1': 0.5086353968463568,
 'eval_loss': 1.358112096786499,
 'eval_precision': 0.5229340440404717,
 'eval_recall': 0.4998471452559022,
 'eval_runtime': 9.2551,
 'eval_samples_per_second': 180.874,
 'eval_steps_per_second': 11.345}

The following columns in the test set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: tokens. If tokens are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 1675
  Batch size = 16


PredictionOutput(predictions=array([[ 3.3729086 , -2.2987332 , -1.5236608 ,  2.9040341 , -3.8212361 ],
       [ 2.40844   , -1.2382742 ,  1.4599144 ,  0.2062933 , -3.8143141 ],
       [ 1.1286784 , -3.6143486 ,  3.5224068 ,  1.2309724 , -3.0709188 ],
       ...,
       [ 2.9815881 ,  1.259246  , -2.6544123 ,  1.2848495 , -3.848719  ],
       [ 0.6710588 , -3.4833012 ,  4.8904886 ,  0.13078153, -2.9407892 ],
       [ 1.1168277 , -1.2139498 , -1.9687793 , -2.0621443 ,  4.127039  ]],
      dtype=float32), label_ids=array([0, 3, 2, ..., 1, 2, 4]), metrics={'test_loss': 1.3584296703338623, 'test_accuracy': 0.5450746268656717, 'test_f1': 0.49632022587788516, 'test_precision': 0.5214839662853626, 'test_recall': 0.4797219889038071, 'test_runtime': 9.5083, 'test_samples_per_second': 176.161, 'test_steps_per_second': 11.043})

Saving model checkpoint to /content/drive/Shareddrives/PLN/Assignment 2/models/ro/bert-base-multilingual-cased
Configuration saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ro/bert-base-multilingual-cased/config.json
Model weights saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ro/bert-base-multilingual-cased/pytorch_model.bin
tokenizer config file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ro/bert-base-multilingual-cased/tokenizer_config.json
Special tokens file saved in /content/drive/Shareddrives/PLN/Assignment 2/models/ro/bert-base-multilingual-cased/special_tokens_map.json


In [24]:
!rm -rf ./results/

In [29]:
tokenized_dataset

DatasetDict({
    train_pt: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13394
    })
    validation: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1674
    })
    test: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1675
    })
    train_en: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13394
    })
    train_es: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13394
    })
    train_ga: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13394
    })
    train_fr: Dataset({
        features: ['tokens', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 13