## fine tune bert model for custom dataset

### 1. install libraries

In [1]:
! pip install transformers

Collecting transformers
  Downloading transformers-4.17.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 5.3 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 5.3 MB/s 
Collecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 23.7 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 34.1 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 31.1 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found ex

In [2]:
! pip install imbalanced-learn



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### 2. load/define data set

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from transformers import AutoTokenizer
import numpy as np
DATA_DIR = "drive/MyDrive/data"


def applylimit(df: pd.DataFrame, n: int = 2) -> pd.DataFrame:
    """
    :param df: data frame with 'label' column
    :param n: min number of samples required
    :return: data frame with minimum number of samples per label
    """
    dfcounts = df.groupby('label').size().reset_index(name='counts')
    dfmincounts = dfcounts[dfcounts.counts > n]
    return df[df.label.isin(dfmincounts.label.tolist())]


def encodelabels(df: pd.DataFrame) -> pd.DataFrame:
    """
    :param df: data frame with 'label' column
    :return: data frame with 'intlabel' column containing encoded labels
    """
    df['intlabel'] = df['label'].rank(method='dense', ascending=False).astype(int) - 1
    return df


def loadpreprocesseddata(path: str) -> pd.DataFrame:
    """
    :param path: absolute path to a csv file with 'label' column
    :return: data frame with labels mapped to
        integer values in the intlabel column
    """
    df = pd.read_csv(path)
    df = applylimit(df=df, n=2)
    df = encodelabels(df=df)
    return df


def getmapping(data: pd.DataFrame) -> dict:
    """
    :param data: data frame with 'intlabel' columns containing int values
        and 'label' column containing str values
    :return: mapping from int values to str
    """
    labelmapping = {}
    for key in data.intlabel.unique():
        value = data.loc[data['intlabel'] == key, 'label'].unique()[0]
        labelmapping[key] = value
    return labelmapping


def splitdata(data: pd.DataFrame, ratio: float = 0.2) -> dict:
    """
    :param data: data frame with 'text' and 'intlabel' columns
    :param ratio: ratio of a test set to a data set
    :return: train and test data sets
    """
    texts = data.text.tolist()
    labels = data.intlabel.tolist()
    trntxt, tsttxt, trnlbl, tstlbl = train_test_split(texts, labels, test_size=ratio)
    return {"train": {"text": trntxt, "label": trnlbl},
            "test": {"text": tsttxt, "label": tstlbl}}


def balancedata(data: dict) -> dict:
    """
    :param data: dictionary with 'text' and 'label' keys
    :return: balanced dataset
    """
    sampler = RandomOverSampler(random_state=42)
    txt = np.asarray(data["text"])
    txt = txt[:, np.newaxis]
    txt, lbl = sampler.fit_resample(txt, data["label"])
    txt = txt.flatten().tolist()
    return {"text": txt, "label": lbl}


def encodefeatures(data: dict, tokenizer) -> list:
    """
    :param data: dictionary with 'text' and 'label' keys
    :param tokenizer: encode text into vectors with integer values
    :return: list of dicts with encoded data
    """
    encodings = tokenizer(data["text"], truncation=True, padding=True)
    zipped = zip(data["label"], encodings['input_ids'], encodings['attention_mask'])
    return [{'label': label,
             'input_ids': input_id,
             'attention_mask': attention_mask} for label, input_id, attention_mask in zipped]


def countlabels(data: dict) -> pd.DataFrame:
    """
    :param data: dictionary with a 'label' key and one feature key
    :return: data frame with 'counts' column containing
        number of samples per label
    """
    df = pd.DataFrame(data)
    return df.groupby('label').size().reset_index(name='counts')


class DataManager:
    def __init__(self, path: str, tokenizer):
        """
        :param path: relative to a data folder path to a csv file with two columns 'text', 'label'
        :param tokenizer: encode text into vectors with integer values.
            loaded with from_pretrained() function for a model that is about to be tuned
        """
        self.data = loadpreprocesseddata(path=f"{DATA_DIR}/{path}")
        self.labelmapping = getmapping(data=self.data)
        self.nlabels = len(self.labelmapping.values())
        self.tokenizer = tokenizer
        self.datasets = {}
        self.trainset = []
        self.testset = []
        self.resamplesets()

    def reloaddata(self, path: str):
        """
        Reload and preprocess again raw data

        :param path: relative to a data folder path to a csv file with two columns 'text', 'label'
        """
        self.data = loadpreprocesseddata(path=f"{DATA_DIR}/{path}")
        self.labelmapping = getmapping(data=self.data)
        self.nlabels = len(self.labelmapping.values())
        self.resamplesets()

    def resamplesets(self):
        """
        Randomly split data into train and test sets
        """
        self.datasets = splitdata(data=self.data)
        self.datasets["train"] = balancedata(data=self.datasets["train"])
        self.trainset = encodefeatures(data=self.datasets["train"], tokenizer=self.tokenizer)
        self.testset = encodefeatures(data=self.datasets["test"], tokenizer=self.tokenizer)

    def getdistribution(self, name: str):
        """
        :param name: name of a subset: train/test
        :return: data frame containing number of
            samples per label in a train dataset
        """
        return countlabels(data=self.datasets[name])


### 4. load pretrained model

In [7]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification as AMSC
from transformers import TrainingArguments, Trainer
import numpy as np
import time
import matplotlib.pyplot as plt
CONFIG_DIR = "drive/MyDrive/configs"


def showlearningcurve(loss: list, evalloss: list):
    """
    :param loss: list of train loss values
    :param evalloss: list of evaluation loss values
    """
    plt.figure()
    plt.xlabel("epoch")
    plt.ylabel("loss")
    epochs = np.arange(len(loss))
    plt.plot(epochs, loss, color='b')
    plt.plot(epochs, evalloss, color='r')
    plt.legend(['train loss', 'test loss'])
    plt.show()


class FineTuner:
    def __init__(self, config: dict):
        """
        :param config: configuration with training parameters.
            required are: 'modelname', 'datapath'
        """
        self.config = config
        self.tokenizer = AutoTokenizer.from_pretrained(self.config["modelname"])
        self.dm = DataManager(path=self.config["datapath"], tokenizer=self.tokenizer)
        self.model = AMSC.from_pretrained(self.config["modelname"], num_labels=self.dm.nlabels)
        self.args = self.getargs()
        self.trainer = self.gettrainer()

    def reloaddata(self):
        """
        Reload and preprocess raw data
        """
        self.dm.reloaddata(path=self.config["datapath"])

    def resample(self):
        """
        Randomly resample train and test sets
        """
        self.dm.resamplesets()

    def reloadmodel(self):
        """
        Reload model for fine tuning
        """
        self.model = AMSC.from_pretrained(self.config["modelname"], num_labels=self.dm.nlabels)

    def getargs(self) -> TrainingArguments:
        """
        :return: configured training arguments
        """
        return TrainingArguments(
            output_dir="./tunedbert",
            do_eval=True,
            evaluation_strategy="epoch",
            learning_rate=self.config["lr"],
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            logging_strategy="epoch",
            num_train_epochs=self.config["nepochs"],
            weight_decay=0.01,
        )

    def gettrainer(self) -> Trainer:
        """
        :return: configured trainer
        """
        return Trainer(
            model=self.model,
            args=self.args,
            train_dataset=self.dm.trainset,
            eval_dataset=self.dm.testset,
            tokenizer=self.tokenizer,
        )

    def train(self, learningcurve: bool = False) -> (list, list):
        """
        :param learningcurve: if true show learning curve after training
        :return: (training loss, evaluation loss)
        """
        self.trainer.train()
        history = np.asarray(self.trainer.state.log_history[:-1])
        loss = [entry['loss'] for entry in history[::2]]
        evalloss = [entry['eval_loss'] for entry in history[1::2]]
        if learningcurve:
            showlearningcurve(loss=loss, evalloss=evalloss)
        return loss, evalloss

    def predictbatch(self, batch: list) -> np.ndarray:
        """
        :param batch: list of encoded inputs
        :return: numpy array of predicted labels
        """
        predictions = self.trainer.predict(batch)
        return np.argmax(predictions.predictions, axis=1)

    def humanpredict(self, sentence: str) -> str:
        """
        :return: predicted label
        """
        pass


### 6. Test trained model

In [8]:
import numpy as np
import yaml
import sys

def loadconfig(path: str) -> dict:
    """
    :param path: path to a configuration file
    :return: configurations as a dictionary
    """
    with open(path) as f:
        try:
            return yaml.load(stream=f, Loader=yaml.FullLoader)
        except IOError as e:
            sys.exit(f"FAILED TO LOAD CONFIG {path}: {e}")



class Evaluator:
    def __init__(self):
        self.trials = []
        config = loadconfig(path=f"{CONFIG_DIR}/finetune.yaml")
        self.tuner = FineTuner(config=config)

    def evaluate(self) -> dict:
        """
        Train and evaluate fine tuned model
        :return: dictionary with results
        """
        self.tuner.train()
        batch = self.tuner.dm.testset
        predictions = self.tuner.predictbatch(batch=batch)
        groundtruth = np.array([entry['label'] for entry in batch])
        correct = np.sum(predictions == groundtruth)
        accuracy = correct / groundtruth.shape[0]
        return {"accuracy": accuracy,
                "correct": correct,
                "total": groundtruth.shape[0],
                "predicted": predictions,
                "groundtruth": groundtruth}

    def processresults(self) -> dict:
        """
        :return: mean and total results respective to the metrics for all trials combined
        """
        results = {"accuracy": 0, "correct": 0, "total": 0}
        for trial in self.trials:
            for key in results.keys():
                results[key] += trial[key]
        results["accuracy"] /= len(self.trials)
        return results

    def runevaluation(self, n: int = 5) -> dict:
        """
        Run evaluations n times and return mean score

        :param n: number of evaluation iterations
        :return: dictionary with results
        """
        for i in range(n):
            self.trials.append(self.evaluate())
            print(f"TRIAL {i}; ACCURACY: {self.trials[-1]['accuracy']}")
            self.tuner.resample()
            self.tuner.reloadmodel()
        return self.processresults()

In [10]:
evaluator = Evaluator()
results = evaluator.runevaluation(n=5)

https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/tokenizer_config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpi45p0n6s


Downloading:   0%|          | 0.00/402 [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/tokenizer_config.json in cache at /root/.cache/huggingface/transformers/ab333f0c0e8ca2b891af2b3aeeea5bb0a6817083df22b6fb248a065904a7e032.199f68f9cbacfa8c1ea1f23db0b30d113d1d1ec00c9041f48cd39ab65741338a
creating metadata file for /root/.cache/huggingface/transformers/ab333f0c0e8ca2b891af2b3aeeea5bb0a6817083df22b6fb248a065904a7e032.199f68f9cbacfa8c1ea1f23db0b30d113d1d1ec00c9041f48cd39ab65741338a
https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp86_ds5um


Downloading:   0%|          | 0.00/723 [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json in cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
creating metadata file for /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_pro

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/sentencepiece.bpe.model in cache at /root/.cache/huggingface/transformers/3477950aabc6f988b3f5c58ea0a1996dcf8f5f2c12647011f120b3be1c8ee90a.71e50b08dbe7e5375398e165096cacc3d2086119d6a449364490da6908de655e
creating metadata file for /root/.cache/huggingface/transformers/3477950aabc6f988b3f5c58ea0a1996dcf8f5f2c12647011f120b3be1c8ee90a.71e50b08dbe7e5375398e165096cacc3d2086119d6a449364490da6908de655e
https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/tokenizer.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmp2b3afpbd


Downloading:   0%|          | 0.00/8.66M [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/tokenizer.json in cache at /root/.cache/huggingface/transformers/b26c00523dd0cefb9eca356096c6771639f599068dbd4cdeb56a1b9698211208.c405a8c40385cf4ccadaf968f41d7b7a1a4e35124e51455cffa458d1f8d2c552
creating metadata file for /root/.cache/huggingface/transformers/b26c00523dd0cefb9eca356096c6771639f599068dbd4cdeb56a1b9698211208.c405a8c40385cf4ccadaf968f41d7b7a1a4e35124e51455cffa458d1f8d2c552
https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/special_tokens_map.json not found in cache or force_download set to True, downloading to /root/.cache/huggingface/transformers/tmpwsbvugmb


Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/special_tokens_map.json in cache at /root/.cache/huggingface/transformers/4ac3a98f3bb4ac724ac3f0ad472e1955687f94bfa55a8b907fe23549b27429b4.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342
creating metadata file for /root/.cache/huggingface/transformers/4ac3a98f3bb4ac724ac3f0ad472e1955687f94bfa55a8b907fe23549b27429b4.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342
loading file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/sentencepiece.bpe.model from cache at /root/.cache/huggingface/transformers/3477950aabc6f988b3f5c58ea0a1996dcf8f5f2c12647011f120b3be1c8ee90a.71e50b08dbe7e5375398e165096cacc3d2086119d6a449364490da6908de655e
loading file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/b26c00

Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

storing https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
creating metadata file for /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
loading weights file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
Some weights of the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.we

Epoch,Training Loss,Validation Loss
1,2.5091,1.979231
2,1.278,1.299065
3,0.7647,1.117013


***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 33
  Batch size = 16


loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_

TRIAL 0; ACCURACY: 0.7878787878787878


loading weights file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
Some weights of the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

Epoch,Training Loss,Validation Loss
1,0.6758,1.117013
2,0.6728,1.117013
3,0.6738,1.117013


***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 33
  Batch size = 16


loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_

TRIAL 1; ACCURACY: 0.9696969696969697


loading weights file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
Some weights of the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

Epoch,Training Loss,Validation Loss
1,0.6754,1.117013
2,0.6762,1.117013
3,0.6699,1.117013


***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 33
  Batch size = 16


TRIAL 2; ACCURACY: 0.9393939393939394


loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_

Epoch,Training Loss,Validation Loss
1,0.6711,1.117013
2,0.6711,1.117013
3,0.6759,1.117013


***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 33
  Batch size = 16


loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_

TRIAL 3; ACCURACY: 0.9393939393939394


loading weights file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
Some weights of the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

Epoch,Training Loss,Validation Loss
1,0.6682,1.117013
2,0.6702,1.117013
3,0.671,1.117013


***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16
***** Running Evaluation *****
  Num examples = 33
  Batch size = 16


Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Prediction *****
  Num examples = 33
  Batch size = 16


loading configuration file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/ded7802c9a1fa89eb9dd9f457a200398369bbf5210a16b33996f5068c73e3a15.ba455ec869fd0d70e7e3b2b1fd62bf88c960c377df590baf894f0ab00d113f2c
Model config XLMRobertaConfig {
  "_name_or_path": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
  "architectures": [
    "XLMRobertaModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_

TRIAL 4; ACCURACY: 0.7878787878787878


loading weights file https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/e92c166da55aafd5132b2303f81e33adf5982014b93fbbd6a83b37a9d88dde7e.0eb6384a7113d96cb7ac12ab686050a3f5d328e6cc6f016bbba66af2ced30777
Some weights of the model checkpoint at sentence-transformers/paraphrase-multilingual-mpnet-base-v2 were not used when initializing XLMRobertaForSequenceClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a Be

In [11]:
results

{'accuracy': 0.884848484848485, 'correct': 146, 'total': 165}

In [12]:
evaluator.trials

[{'accuracy': 0.7878787878787878,
  'correct': 26,
  'groundtruth': array([ 7, 20, 12, 13, 10, 11,  0, 16, 17,  6, 13,  6, 17,  5,  3, 14, 22,
          7,  5, 16,  9, 12, 15, 21, 18,  0, 11, 13, 13,  8, 19, 18,  0]),
  'predicted': array([ 7, 20, 12, 13, 10, 11,  0, 16, 17,  6, 13,  6, 17,  4, 17, 14,  6,
          7,  4, 16, 11, 12, 15, 21, 18,  0, 11, 13, 13,  2, 19, 15,  0]),
  'total': 33},
 {'accuracy': 0.9696969696969697,
  'correct': 32,
  'groundtruth': array([ 9,  8,  3, 15, 11, 18,  9, 15,  9,  3, 19, 12, 20, 13, 18, 17,  6,
          2, 10, 19, 13, 19, 20, 21, 17,  1, 10, 15,  6,  9,  4, 13, 14]),
  'predicted': array([ 9,  8,  3, 15, 11, 18,  9, 15,  9, 17, 19, 12, 20, 13, 18, 17,  6,
          2, 10, 19, 13, 19, 20, 21, 17,  1, 10, 15,  6,  9,  4, 13, 14]),
  'total': 33},
 {'accuracy': 0.9393939393939394,
  'correct': 31,
  'groundtruth': array([20, 14,  6, 11, 13, 21,  2,  0, 20, 17, 21,  3, 17, 13,  8, 17, 13,
          6, 17,  9,  0, 19, 20, 18,  2, 13, 21,  9,  7,  6