In [1]:
! pip install simpletransformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting simpletransformers
  Downloading simpletransformers-0.63.9-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.5/250.5 KB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.97-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m33.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting streamlit
  Downloading streamlit-1.20.0-py2.py3-none-any.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m40.4 MB/s[0m eta [36

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from simpletransformers.classification import ClassificationModel
import pandas as pd
import logging
import sklearn

wassa_train_csv_path = '/content/drive/My Drive/WASSA_train_all.csv'
wassa_dev_csv_path = '/content/drive/My Drive/WASSA_dev_all.csv'
wassa_train = pd.read_csv(wassa_train_csv_path)
wassa_dev = pd.read_csv(wassa_dev_csv_path)

In [4]:
train_df = pd.DataFrame(wassa_train)
train_df.columns = ["text", "labels"]

eval_df = pd.DataFrame(wassa_dev)
eval_df.columns = ["text", "labels"]

In [5]:
import torch
from torch import cuda
import gc

def empty_cache(model, optimizer):
  device = 'cuda' if cuda.is_available() else 'cpu'
  with torch.no_grad():
      torch.cuda.empty_cache()
  gc.collect()

In [6]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, precision_score, recall_score

def f1_multiclass(labels, preds):
  return f1_score(labels, preds, average='macro')

def precision_multiclass(labels,preds):
  return precision_score(labels, preds, average='macro')

def recall_multiclass(labels,preds):
  return recall_score(labels, preds, average='macro')


In [7]:
results = []

reps = 3
for i in range(reps):

  model = ClassificationModel(
    "roberta", 
    "roberta-base",
    num_labels=7, 
    args={'num_train_epochs':2,
          'max_seq_length':256,
          'learning_rate':1e-5,
          'overwrite_output_dir': True, 
          'custom_callback': empty_cache
          }
  )

  model.train_model(train_df)

  result, model_outputs, wrong_predictions = model.eval_model(eval_df, 
                                                              acc=sklearn.metrics.accuracy_score,
                                                              f1=f1_multiclass,
                                                              precision=precision_multiclass,
                                                              recall=recall_multiclass,
                                                              )

  
  results.append({'run': i + 1, 'accuracy': result['acc'], 'f1': result['f1'], 'precision': result['precision'], 'recall': result['recall']})

print('accuracy, f1, precision, recall:')
for i in results:
  print(i['accuracy'], i['f1'], i['precision'], i['recall'])

Downloading (…)lve/main/config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

  0%|          | 0/1860 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

  0%|          | 0/270 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/34 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are ne

  0%|          | 0/1860 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

  0%|          | 0/270 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/34 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are ne

  0%|          | 0/1860 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

Running Epoch 1 of 2:   0%|          | 0/233 [00:00<?, ?it/s]

  0%|          | 0/270 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/34 [00:00<?, ?it/s]

accuracy, f1, precision, recall:
0.662962962962963 0.43996420433667394 0.45216891692046973 0.4493411276710539
0.6407407407407407 0.39568707223693744 0.441306574781151 0.39718468323540906
0.6370370370370371 0.4180286463300564 0.47111361715322114 0.4180216505714583


  _warn_prf(average, modifier, msg_start, len(result))
