In [1]:
import pandas as pd
import datasets
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification,Trainer, TrainingArguments, PretrainedConfig
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import sklearn.metrics as metric
from tqdm import tqdm
import wandb
import os


In [2]:
path  = '/home/csgrad/smaranas/STDA/'

In [3]:
# load model and tokenizer and define length of the text sequence
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=11)
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', max_length = 512)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

In [4]:
train_data = datasets.load_dataset('csv', data_files='p3_train.csv', split='train')
test_data = datasets.load_dataset('csv', data_files='p3_test.csv', split='train')


Using custom data configuration default-903b86b0d8c77dba
Reusing dataset csv (/home/csgrad/smaranas/.cache/huggingface/datasets/csv/default-903b86b0d8c77dba/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)
Using custom data configuration default-7066673e9c105372
Reusing dataset csv (/home/csgrad/smaranas/.cache/huggingface/datasets/csv/default-7066673e9c105372/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


In [5]:
# define a function that will tokenize the model, and will return the relevant inputs for the model
def tokenization(batched_text):
  return tokenizer(batched_text['text'], padding = True, truncation=True)


train_data = train_data.map(tokenization, batched = True, batch_size = len(train_data))
test_data = test_data.map(tokenization, batched = True, batch_size = len(test_data))


Loading cached processed dataset at /home/csgrad/smaranas/.cache/huggingface/datasets/csv/default-903b86b0d8c77dba/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-e16fc8fa15282362.arrow
Loading cached processed dataset at /home/csgrad/smaranas/.cache/huggingface/datasets/csv/default-7066673e9c105372/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-173a221bfce50882.arrow


In [6]:
train_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])


In [7]:
# define accuracy metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    # fpr, tpr, threshold = metric.roc_curve(labels, preds, pos_label=1)
    # # calibration_loss = metric.brier_score_loss(labels, )

    # fnr = 1 - tpr
    # eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    # EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    # EER1 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]
    # if EER == EER1:
    #   print('EER sanity check passed')
    #   print('fpr : ', fpr)
    #   print('tpr : ', tpr)
    #   print('threshold : ', threshold)
    #   print('EER : ', EER)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        # 'fpr': fpr, 
        # 'tpr': tpr, 
        # 'threshold': threshold,
        # 'EER': EER,
        
    }

In [8]:
#  define the training arguments
training_args = TrainingArguments(
    output_dir = path,
    num_train_epochs=30,
    per_device_train_batch_size = 3,
    gradient_accumulation_steps = 16,    
    per_device_eval_batch_size= 8,
    evaluation_strategy = "epoch",
    disable_tqdm = False, 
    save_strategy = "epoch",
    load_best_model_at_end=True,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps = 8,
    # fp16 = True,
    logging_dir=path+'/logs',
    dataloader_num_workers = 0,
    run_name = 'roberta-classification_titan'
)

In [9]:
# instantiate the trainer class and check for available devices
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=test_data
)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [10]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 9486
  Num Epochs = 30
  Instantaneous batch size per device = 3
  Total train batch size (w. parallel, distributed & accumulation) = 192
  Gradient Accumulation steps = 16
  Total optimization steps = 1470
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
[34m[1mwandb[0m: Currently logged in as: [33mpsmarana[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
0,2.2898,2.186842,0.287099,0.220402,0.356885,0.296571
1,1.355,1.166198,0.654722,0.627497,0.64872,0.655857
2,0.8201,0.696295,0.765599,0.764098,0.77646,0.767222
3,0.6153,0.593255,0.787099,0.786528,0.79546,0.789228
4,0.5207,0.536342,0.801855,0.801658,0.811777,0.803956
5,0.4689,0.528933,0.806071,0.799916,0.832636,0.806804
6,0.4075,0.424498,0.844013,0.84622,0.850551,0.847066
7,0.3876,0.457632,0.837268,0.839441,0.849426,0.839569
8,0.2185,0.53755,0.824621,0.822398,0.84792,0.828028
9,0.2097,0.50627,0.833474,0.832039,0.858325,0.837568


The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
  'precision', 'predicted', average, warn_for)
Saving model checkpoint to /home/csgrad/smaranas/STDA/checkpoint-49
Configuration saved in /home/csgrad/smaranas/STDA/checkpoint-49/config.json
Model weights saved in /home/csgrad/smaranas/STDA/checkpoint-49/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving mo

***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/STDA/checkpoint-490
Configuration saved in /home/csgrad/smaranas/STDA/checkpoint-490/config.json
Model weights saved in /home/csgrad/smaranas/STDA/checkpoint-490/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/STDA/checkpoint-539
Configuration saved in /home/csgrad/smaranas/STDA/checkpoint-539/config.json
Model weights saved in /home/csgrad/smaranas/STDA/checkpoint-539/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassifica

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/STDA/checkpoint-980
Configuration saved in /home/csgrad/smaranas/STDA/checkpoint-980/config.json
Model weights saved in /home/csgrad/smaranas/STDA/checkpoint-980/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/STDA

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/STDA/checkpoint-1421
Configuration saved in /home/csgrad/smaranas/STDA/checkpoint-1421/config.json
Model weights saved in /home/csgrad/smaranas/STDA/checkpoint-1421/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8
Saving model checkpoint to /home/csgrad/smaranas/S

TrainOutput(global_step=1470, training_loss=0.2964328759052113, metrics={'train_runtime': 8229.8012, 'train_samples_per_second': 34.579, 'train_steps_per_second': 0.179, 'total_flos': 7.486167035781734e+16, 'train_loss': 0.2964328759052113, 'epoch': 29.99})

In [11]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: Unnamed: 0, text. If Unnamed: 0, text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 2372
  Batch size = 8


{'eval_loss': 0.4244978427886963,
 'eval_accuracy': 0.8440134907251264,
 'eval_f1': 0.846219595149542,
 'eval_precision': 0.8505508847831962,
 'eval_recall': 0.8470660678717997,
 'eval_runtime': 14.7225,
 'eval_samples_per_second': 161.114,
 'eval_steps_per_second': 5.094,
 'epoch': 29.99}

In [12]:
recall = [0.296571,
0.655857,
0.767222,
0.789228,
0.803956,
0.806804,
0.847066,
0.839569,
0.828028,
0.837568,
0.819587,
0.854334,
0.858446,
0.857630,
0.871951,
0.851720,
0.863486,
0.865399,
0.841621,
0.874128,
0.858510,
0.871986,
0.874497,
0.863914,
0.877331,
0.876417,
0.873857,
0.866245,
0.880618,
0.881229,]

precision = [0.356885,
0.648720,
0.776460,
0.795460,
0.811777,
0.832636,
0.850551,
0.849426,
0.847920,
0.858325,
0.842292,
0.858707,
0.870259,
0.868779,
0.874690,
0.858540,
0.871680,
0.868850,
0.867023,
0.878355,
0.870065,
0.881134,
0.881235,
0.871396,
0.881466,
0.883078,
0.879808,
0.876884,
0.884091,
0.884335,]

f1 = [0.220402,
0.627497,
0.764098,
0.786528,
0.801658,
0.799916,
0.846220,
0.839441,
0.822398,
0.832039,
0.815709,
0.853485,
0.858829,
0.857112,
0.871853,
0.849879,
0.862103,
0.865082,
0.841903,
0.873340,
0.857825,
0.872748,
0.875060,
0.861993,
0.875928,
0.875731,
0.872247,
0.864369,
0.879786,
0.880658,]

accuracy = [0.287099,
0.654722,
0.765599,
0.787099,
0.801855,
0.806071,
0.844013,
0.837268,
0.824621,
0.833474,
0.816189,
0.852867,
0.854975,
0.853710,
0.869309,
0.849494,
0.859612,
0.863406,
0.837690,
0.870152,
0.856239,
0.868465,
0.872681,
0.859612,
0.873103,
0.872260,
0.869309,
0.861720,
0.876476,
0.877319,]


validation_loss = [2.186842,
1.166198,
0.696295,
0.593255,
0.536342,
0.528933,
0.424498,
0.457632,
0.537550,
0.506270,
0.659687,
0.551709,
0.559344,
0.558760,
0.522413,
0.664865,
0.696518,
0.643400,
0.826523,
0.672140,
0.749824,
0.770781,
0.728389,
0.822151,
0.762331,
0.789314,
0.804961,
0.895249,
0.787886,
0.781676,]

training_loss = [2.289800,
1.355000,
0.820100,
0.615300,
0.520700,
0.468900,
0.407500,
0.387600,
0.218500,
0.209700,
0.227600,
0.123300,
0.090500,
0.134500,
0.078300,
0.051700,
0.032300,
0.025500,
0.036700,
0.024200,
0.023400,
0.018200,
0.009500,
0.006300,
0.012900,
0.004200,
0.002100,
0.001300,
0.001200,
0.001300,]

In [13]:
epoch = range(1,31)

In [None]:
import matplolib.pyplot as plt

plt.plot(epoch, training_loss, label='training_loss')
plt.plot(epoch, validation_loss, label='validation_loss')
plt.plot(epoch, accuracy, label='accuracy')
plt.plot(epoch, f1, label='f1')
plt.plot(epoch, precision, label='precision')
plt.plot(epoch, recall, label='recall')

plt.xlabel('epoch')
plt.legend()
plt.title('training stats')
plt.savefig('training_stats.png')