# target_iSarcasm
This notebook takes models trained on the intermediate tasks and fine-tunes them further on our target task, iSarcasm.

## Imports & Settings

First, update working directory to parent so that we may use our custom functions

In [1]:
import os
 os.chdir('..')
# os.getcwd( )

In [3]:
import params
from utils import *
from trainer import *

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

# suppress model warning
from transformers import logging
logging.set_verbosity_error()

# set logging level
import logging
logging.basicConfig(format='%(message)s', level=logging.INFO)

<torch._C.Generator at 0x29319c810>

In [None]:
# set general seeds
set_seeds(1)

# set dataloader generator seed
g = torch.Generator()
g.manual_seed(1)

# Ensure we're on an ARM environment if necessary.
platform_check()

We're Armed: macOS-13.1-arm64-i386-64bit


## Load Data

### iSarcasm

In [4]:
dataset_path = 'data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv'
df = pd.read_csv(dataset_path)
df = df.rename(columns={'tweet': 'text'})
df = df.rename(columns={'sarcastic': 'label'})

df.head()

Unnamed: 0,text,label
0,@ThePartridgePod Defrost the freezer with the ...,1
1,really honoured to have shared a coach today w...,1
2,I can't believe today is the last day we can b...,1
3,"thank you kind person that stole my ATM card, ...",1
4,I love leaving the doctors office in tears. I ...,1


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5200 entries, 0 to 5199
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    5200 non-null   object
 1   label   5200 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 81.4+ KB


In [6]:
df['label'].value_counts()

1    2600
0    2600
Name: label, dtype: int64

In [7]:
df['label'].value_counts()

1    2600
0    2600
Name: label, dtype: int64

### Target Text & Labels

In [8]:
text = df.text.values
labels = df.label.values

### Preprocess

In [9]:
# load best intermediate/control model
# path format example:  "model_saves/intermediate_XED_binary_01/E03_A0.83_F0.82"
intermediate_model_path: str = str(input("What is the path to the best model?"))

# update tokenizer to use trained model's tokenizer
params.tokenizer = RobertaTokenizer.from_pretrained(intermediate_model_path, local_files_only=True)

print(params.tokenizer)

PreTrainedTokenizer(name_or_path='model_saves/intermediate_hellaswag_01/E07_A0.61_F0.61', vocab_size=50265, model_max_len=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'eos_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'unk_token': AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'sep_token': AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'pad_token': AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'cls_token': AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True), 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=True)})


In [10]:
token_id = []
attention_masks = []

for sample in text:
  encoding_dict = preprocessing(sample, params.tokenizer)
  token_id.append(encoding_dict['input_ids']) 
  attention_masks.append(encoding_dict['attention_mask'])


token_id = torch.cat(token_id, dim = 0)
attention_masks = torch.cat(attention_masks, dim = 0)
labels = torch.tensor(labels)

### Data Split
We split the dataset into train (80%) and validation (20%) sets, and wrap them around a torch.utils.data.DataLoader object.

In [11]:
val_ratio = 0.2

# Indices of the train and validation splits stratified by labels
train_idx, val_idx = train_test_split(
    np.arange(len(labels)),
    test_size = val_ratio,
    shuffle = True,
    stratify = labels,
    random_state=1)

# Train and validation sets
train_set = TensorDataset(token_id[train_idx], 
                          attention_masks[train_idx], 
                          labels[train_idx])

val_set = TensorDataset(token_id[val_idx], 
                        attention_masks[val_idx], 
                        labels[val_idx])

# Prepare DataLoader
train_dataloader = DataLoader(
            train_set,
            sampler = RandomSampler(train_set),
            batch_size = params.batch_size,
            worker_init_fn=seed_worker,
            generator=g,
        )

validation_dataloader = DataLoader(
            val_set,
            sampler = RandomSampler(val_set),
            batch_size = params.batch_size,
            worker_init_fn=seed_worker,
            generator=g,
        )

## intermediate_XED_binary

In [None]:
# set params for this model
params.num_labels = 2
params.output_dir = "model_saves/target-iSarcasm_inter-XED-binary_03"

### Train

Download transformers.RobertaForSequenceClassificatio, which is a RoBERTa model with a linear layer for sentence classification (or regression) on top of the pooled output:

In [12]:
# Load the RobertaForSequenceClassification model
model = RobertaForSequenceClassification.from_pretrained(intermediate_model_path,
                                                         num_labels = params.num_labels,
                                                         local_files_only=True, 
                                                         ignore_mismatched_sizes=True,
                                                         output_attentions = False,
                                                         output_hidden_states = False,
                                                         )

from torchinfo import summary
summary(model, input_size=(1, 512), dtypes=['torch.IntTensor'])

Layer (type:depth-idx)                                       Output Shape              Param #
RobertaForSequenceClassification                             [1, 2]                    --
├─RobertaModel: 1-1                                          [1, 512, 768]             --
│    └─RobertaEmbeddings: 2-1                                [1, 512, 768]             --
│    │    └─Embedding: 3-1                                   [1, 512, 768]             38,603,520
│    │    └─Embedding: 3-2                                   [1, 512, 768]             768
│    │    └─Embedding: 3-3                                   [1, 512, 768]             394,752
│    │    └─LayerNorm: 3-4                                   [1, 512, 768]             1,536
│    │    └─Dropout: 3-5                                     [1, 512, 768]             --
│    └─RobertaEncoder: 2-2                                   [1, 512, 768]             --
│    │    └─ModuleList: 3-6                                  --               

Set model to device, initialize trainer

In [13]:
model.to(params.device)
print(f"Trained Dataset: {dataset_path}")
print(f"Device: {params.device}")

optimizer = torch.optim.Adam(params=model.parameters(), lr=params.learning_rate) #roberta

trainer = Trainer(model=model,
                  device=params.device,
                  tokenizer=params.tokenizer,
                  train_dataloader=train_dataloader,
                  validation_dataloader=validation_dataloader,
                  epochs=params.epochs,
                  optimizer=optimizer,
                  val_loss_fn=params.val_loss_fn,
                  notify=params.notify,
                  phone_number=params.phone_number,
                  save_dir=params.save_dir,
                  model_name=params.model_name, 
                  save_freq=params.save_freq)

Trained Dataset: data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv
Device: mps


Fit the model to our training data.

In [14]:
trainer.fit()

  incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
Epoch 1: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.47batch/s]


	 - Train loss: 0.667241
	 - Validation Loss: 0.524531
	 - Validation Accuracy: 0.735577
	 - Validation F1: 0.738615
	 - Validation Recall: 0.792228
	 - Validation Precision: 0.715383
	 * Model @ epoch 1 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E01_A0.74_F0.74


Epoch 2: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.447875
	 - Validation Loss: 0.374703
	 - Validation Accuracy: 0.830769
	 - Validation F1: 0.825035
	 - Validation Recall: 0.858770
	 - Validation Precision: 0.814744
	 * Model @ epoch 2 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E02_A0.83_F0.83


Epoch 3: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.43batch/s]


	 - Train loss: 0.226148
	 - Validation Loss: 0.370340
	 - Validation Accuracy: 0.854808
	 - Validation F1: 0.860883
	 - Validation Recall: 0.946372
	 - Validation Precision: 0.802112
	 * Model @ epoch 3 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E03_A0.85_F0.86


Epoch 4: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.118373
	 - Validation Loss: 0.333577
	 - Validation Accuracy: 0.891346
	 - Validation F1: 0.887355
	 - Validation Recall: 0.929589
	 - Validation Precision: 0.860374
	 * Model @ epoch 4 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E04_A0.89_F0.89


Epoch 5: 100%|██████████| 260/260 [05:44<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.44batch/s]


	 - Train loss: 0.065722
	 - Validation Loss: 0.337964
	 - Validation Accuracy: 0.914423
	 - Validation F1: 0.910753
	 - Validation Recall: 0.933489
	 - Validation Precision: 0.898655
	 * Model @ epoch 5 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E05_A0.91_F0.91


Epoch 6: 100%|██████████| 260/260 [05:50<00:00,  1.35s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:28<00:00,  2.24batch/s]


	 - Train loss: 0.041056
	 - Validation Loss: 0.383040
	 - Validation Accuracy: 0.906731
	 - Validation F1: 0.903193
	 - Validation Recall: 0.938093
	 - Validation Precision: 0.878795
	 * Model @ epoch 6 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E06_A0.91_F0.9


Epoch 7: 100%|██████████| 260/260 [06:04<00:00,  1.40s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.48batch/s]


	 - Train loss: 0.048199
	 - Validation Loss: 0.369328
	 - Validation Accuracy: 0.910577
	 - Validation F1: 0.907482
	 - Validation Recall: 0.948942
	 - Validation Precision: 0.877798
	 * Model @ epoch 7 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E07_A0.91_F0.91


Epoch 8: 100%|██████████| 260/260 [15:41<00:00,  3.62s/batch]  
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.030835
	 - Validation Loss: 0.786566
	 - Validation Accuracy: 0.826923
	 - Validation F1: 0.839350
	 - Validation Recall: 0.968051
	 - Validation Precision: 0.752337
	 * Model @ epoch 8 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E08_A0.83_F0.84


Epoch 9: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.032423
	 - Validation Loss: 0.377537
	 - Validation Accuracy: 0.919231
	 - Validation F1: 0.912910
	 - Validation Recall: 0.933373
	 - Validation Precision: 0.901565
	 * Model @ epoch 9 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E09_A0.92_F0.91


Epoch 10: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.44batch/s]


	 - Train loss: 0.024998
	 - Validation Loss: 0.476860
	 - Validation Accuracy: 0.892308
	 - Validation F1: 0.892355
	 - Validation Recall: 0.944895
	 - Validation Precision: 0.857109
	 * Model @ epoch 10 saved to model_saves/target-iSarcasm_inter-XED-binary_03/E10_A0.89_F0.89


## intermediate_XED_fine

In [None]:
# set params for this model
params.num_labels = 2
params.output_dir = "model_saves/target-iSarcasm_inter-XED-fine_03"

### Train

Download transformers.RobertaForSequenceClassificatio, which is a RoBERTa model with a linear layer for sentence classification (or regression) on top of the pooled output:

In [12]:
# Load the RobertaForSequenceClassification model
model = RobertaForSequenceClassification.from_pretrained(intermediate_model_path,
                                                         num_labels = params.num_labels,
                                                         local_files_only=True, 
                                                         ignore_mismatched_sizes=True,
                                                         output_attentions = False,
                                                         output_hidden_states = False,
                                                         )

from torchinfo import summary
summary(model, input_size=(1, 512), dtypes=['torch.IntTensor'])

Layer (type:depth-idx)                                       Output Shape              Param #
RobertaForSequenceClassification                             [1, 2]                    --
├─RobertaModel: 1-1                                          [1, 512, 768]             --
│    └─RobertaEmbeddings: 2-1                                [1, 512, 768]             --
│    │    └─Embedding: 3-1                                   [1, 512, 768]             38,603,520
│    │    └─Embedding: 3-2                                   [1, 512, 768]             768
│    │    └─Embedding: 3-3                                   [1, 512, 768]             394,752
│    │    └─LayerNorm: 3-4                                   [1, 512, 768]             1,536
│    │    └─Dropout: 3-5                                     [1, 512, 768]             --
│    └─RobertaEncoder: 2-2                                   [1, 512, 768]             --
│    │    └─ModuleList: 3-6                                  --               

Set model to device, initialize trainer

In [13]:
model.to(params.device)
print(f"Trained Dataset: {dataset_path}")
print(f"Device: {params.device}")

optimizer = torch.optim.Adam(params=model.parameters(), lr=params.learning_rate) #roberta

trainer = Trainer(model=model,
                  device=params.device,
                  tokenizer=params.tokenizer,
                  train_dataloader=train_dataloader,
                  validation_dataloader=validation_dataloader,
                  epochs=params.epochs,
                  optimizer=optimizer,
                  val_loss_fn=params.val_loss_fn,
                  num_labels=params.num_labels,
                  output_dir=params.output_dir,
                  save_freq=params.save_freq,
                  checkpoint_freq=params.checkpoint_freq)

Trained Dataset: data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv
Device: mps


Fit the model to our training data.

In [14]:
trainer.fit()

  incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
Epoch 1: 100%|██████████| 260/260 [05:42<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:25<00:00,  2.51batch/s]


	 - Train loss: 0.623434
	 - Validation Loss: 0.508588
	 - Validation Accuracy: 0.757692
	 - Validation F1: 0.772261
	 - Validation Recall: 0.870527
	 - Validation Precision: 0.714164
	 * Model @ epoch 1 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E01_A0.76_F0.77


Epoch 2: 100%|██████████| 260/260 [05:41<00:00,  1.31s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:25<00:00,  2.51batch/s]


	 - Train loss: 0.390146
	 - Validation Loss: 0.356848
	 - Validation Accuracy: 0.847115
	 - Validation F1: 0.846331
	 - Validation Recall: 0.927307
	 - Validation Precision: 0.797252
	 * Model @ epoch 2 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E02_A0.85_F0.85


Epoch 3: 100%|██████████| 260/260 [05:43<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.48batch/s]


	 - Train loss: 0.200555
	 - Validation Loss: 0.277212
	 - Validation Accuracy: 0.898077
	 - Validation F1: 0.894615
	 - Validation Recall: 0.925760
	 - Validation Precision: 0.874832
	 * Model @ epoch 3 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E03_A0.9_F0.89


Epoch 4: 100%|██████████| 260/260 [05:44<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.089316
	 - Validation Loss: 0.399759
	 - Validation Accuracy: 0.882692
	 - Validation F1: 0.879519
	 - Validation Recall: 0.955822
	 - Validation Precision: 0.828952
	 * Model @ epoch 4 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E04_A0.88_F0.88


Epoch 5: 100%|██████████| 260/260 [05:44<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.064681
	 - Validation Loss: 0.337704
	 - Validation Accuracy: 0.901923
	 - Validation F1: 0.900604
	 - Validation Recall: 0.938709
	 - Validation Precision: 0.879833
	 * Model @ epoch 5 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E05_A0.9_F0.9


Epoch 6: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.057342
	 - Validation Loss: 0.399784
	 - Validation Accuracy: 0.889423
	 - Validation F1: 0.887724
	 - Validation Recall: 0.955199
	 - Validation Precision: 0.843308
	 * Model @ epoch 6 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E06_A0.89_F0.89


Epoch 7: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.036000
	 - Validation Loss: 0.353382
	 - Validation Accuracy: 0.912500
	 - Validation F1: 0.909128
	 - Validation Recall: 0.950297
	 - Validation Precision: 0.882565
	 * Model @ epoch 7 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E07_A0.91_F0.91


Epoch 8: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.026952
	 - Validation Loss: 0.430084
	 - Validation Accuracy: 0.895192
	 - Validation F1: 0.893925
	 - Validation Recall: 0.945302
	 - Validation Precision: 0.862286
	 * Model @ epoch 8 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E08_A0.9_F0.89


Epoch 9: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.021738
	 - Validation Loss: 0.482866
	 - Validation Accuracy: 0.889423
	 - Validation F1: 0.888994
	 - Validation Recall: 0.949039
	 - Validation Precision: 0.848620
	 * Model @ epoch 9 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E09_A0.89_F0.89


Epoch 10: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.50batch/s]


	 - Train loss: 0.026681
	 - Validation Loss: 0.363273
	 - Validation Accuracy: 0.913462
	 - Validation F1: 0.908598
	 - Validation Recall: 0.925907
	 - Validation Precision: 0.904121
	 * Model @ epoch 10 saved to model_saves/target-iSarcasm_inter-XED-fine_03/E10_A0.91_F0.91


## intermediate_SARC

In [None]:
# set params for this model
params.num_labels = 2
params.output_dir = "model_saves/target-iSarcasm_inter-SARC_03"

### Train

Download transformers.RobertaForSequenceClassificatio, which is a RoBERTa model with a linear layer for sentence classification (or regression) on top of the pooled output:

In [12]:
# Load the RobertaForSequenceClassification model
model = RobertaForSequenceClassification.from_pretrained(intermediate_model_path,
                                                         num_labels = params.num_labels,
                                                         local_files_only=True, 
                                                         ignore_mismatched_sizes=True,
                                                         output_attentions = False,
                                                         output_hidden_states = False,
                                                         )

from torchinfo import summary
summary(model, input_size=(1, 512), dtypes=['torch.IntTensor'])

Layer (type:depth-idx)                                       Output Shape              Param #
RobertaForSequenceClassification                             [1, 2]                    --
├─RobertaModel: 1-1                                          [1, 512, 768]             --
│    └─RobertaEmbeddings: 2-1                                [1, 512, 768]             --
│    │    └─Embedding: 3-1                                   [1, 512, 768]             38,603,520
│    │    └─Embedding: 3-2                                   [1, 512, 768]             768
│    │    └─Embedding: 3-3                                   [1, 512, 768]             394,752
│    │    └─LayerNorm: 3-4                                   [1, 512, 768]             1,536
│    │    └─Dropout: 3-5                                     [1, 512, 768]             --
│    └─RobertaEncoder: 2-2                                   [1, 512, 768]             --
│    │    └─ModuleList: 3-6                                  --               

Set model to device, initialize trainer

In [13]:
model.to(params.device)
print(f"Trained Dataset: {dataset_path}")
print(f"Device: {params.device}")

optimizer = torch.optim.Adam(params=model.parameters(), lr=params.learning_rate) #roberta

trainer = Trainer(model=model,
                  device=params.device,
                  tokenizer=params.tokenizer,
                  train_dataloader=train_dataloader,
                  validation_dataloader=validation_dataloader,
                  epochs=params.epochs,
                  optimizer=optimizer,
                  val_loss_fn=params.val_loss_fn,
                  notify=params.notify,
                  phone_number=params.phone_number,
                  save_dir=params.save_dir,
                  model_name=params.model_name, 
                  save_freq=params.save_freq)

Trained Dataset: data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv
Device: mps


Fit the model to our training data.

In [14]:
trainer.fit()

  incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
Epoch 1: 100%|██████████| 260/260 [05:50<00:00,  1.35s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.589423
	 - Validation Loss: 0.464594
	 - Validation Accuracy: 0.766346
	 - Validation F1: 0.737375
	 - Validation Recall: 0.710190
	 - Validation Precision: 0.795353
	 * Model @ epoch 1 saved to model_saves/target-iSarcasm_inter-SARC_03/E01_A0.77_F0.74


Epoch 2: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:25<00:00,  2.50batch/s]


	 - Train loss: 0.331471
	 - Validation Loss: 0.341481
	 - Validation Accuracy: 0.858654
	 - Validation F1: 0.860319
	 - Validation Recall: 0.921218
	 - Validation Precision: 0.822528
	 * Model @ epoch 2 saved to model_saves/target-iSarcasm_inter-SARC_03/E02_A0.86_F0.86


Epoch 3: 100%|██████████| 260/260 [05:44<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.42batch/s]


	 - Train loss: 0.151912
	 - Validation Loss: 0.352892
	 - Validation Accuracy: 0.897115
	 - Validation F1: 0.895943
	 - Validation Recall: 0.948753
	 - Validation Precision: 0.857389
	 * Model @ epoch 3 saved to model_saves/target-iSarcasm_inter-SARC_03/E03_A0.9_F0.9


Epoch 4: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:27<00:00,  2.40batch/s]


	 - Train loss: 0.089651
	 - Validation Loss: 0.331183
	 - Validation Accuracy: 0.904808
	 - Validation F1: 0.897131
	 - Validation Recall: 0.935004
	 - Validation Precision: 0.874010
	 * Model @ epoch 4 saved to model_saves/target-iSarcasm_inter-SARC_03/E04_A0.9_F0.9


Epoch 5: 100%|██████████| 260/260 [05:44<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.41batch/s]


	 - Train loss: 0.061303
	 - Validation Loss: 0.421003
	 - Validation Accuracy: 0.886538
	 - Validation F1: 0.881715
	 - Validation Recall: 0.938753
	 - Validation Precision: 0.841736
	 * Model @ epoch 5 saved to model_saves/target-iSarcasm_inter-SARC_03/E05_A0.89_F0.88


Epoch 6: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.041041
	 - Validation Loss: 0.466762
	 - Validation Accuracy: 0.890385
	 - Validation F1: 0.885185
	 - Validation Recall: 0.942386
	 - Validation Precision: 0.845128
	 * Model @ epoch 6 saved to model_saves/target-iSarcasm_inter-SARC_03/E06_A0.89_F0.89


Epoch 7: 100%|██████████| 260/260 [05:45<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.42batch/s]


	 - Train loss: 0.029830
	 - Validation Loss: 0.498110
	 - Validation Accuracy: 0.889423
	 - Validation F1: 0.886588
	 - Validation Recall: 0.939278
	 - Validation Precision: 0.849738
	 * Model @ epoch 7 saved to model_saves/target-iSarcasm_inter-SARC_03/E07_A0.89_F0.89


Epoch 8: 100%|██████████| 260/260 [05:44<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.035371
	 - Validation Loss: 0.483717
	 - Validation Accuracy: 0.881731
	 - Validation F1: 0.883549
	 - Validation Recall: 0.953772
	 - Validation Precision: 0.833782
	 * Model @ epoch 8 saved to model_saves/target-iSarcasm_inter-SARC_03/E08_A0.88_F0.88


Epoch 9: 100%|██████████| 260/260 [05:43<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.022856
	 - Validation Loss: 0.514065
	 - Validation Accuracy: 0.891346
	 - Validation F1: 0.889624
	 - Validation Recall: 0.945688
	 - Validation Precision: 0.850813
	 * Model @ epoch 9 saved to model_saves/target-iSarcasm_inter-SARC_03/E09_A0.89_F0.89


Epoch 10: 100%|██████████| 260/260 [05:43<00:00,  1.32s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.018625
	 - Validation Loss: 0.440122
	 - Validation Accuracy: 0.908654
	 - Validation F1: 0.903081
	 - Validation Recall: 0.945047
	 - Validation Precision: 0.877072
	 * Model @ epoch 10 saved to model_saves/target-iSarcasm_inter-SARC_03/E10_A0.91_F0.9


## intermediate_IMDB

In [None]:
# set params for this model
params.num_labels = 2
params.output_dir = "model_saves/target-iSarcasm_inter-IMDB_03"

### Train

Download transformers.RobertaForSequenceClassificatio, which is a RoBERTa model with a linear layer for sentence classification (or regression) on top of the pooled output:

In [12]:
# Load the RobertaForSequenceClassification model
model = RobertaForSequenceClassification.from_pretrained(intermediate_model_path,
                                                         num_labels = params.num_labels,
                                                         local_files_only=True, 
                                                         ignore_mismatched_sizes=True,
                                                         output_attentions = False,
                                                         output_hidden_states = False,
                                                         )

from torchinfo import summary
summary(model, input_size=(1, 512), dtypes=['torch.IntTensor'])

Layer (type:depth-idx)                                       Output Shape              Param #
RobertaForSequenceClassification                             [1, 2]                    --
├─RobertaModel: 1-1                                          [1, 512, 768]             --
│    └─RobertaEmbeddings: 2-1                                [1, 512, 768]             --
│    │    └─Embedding: 3-1                                   [1, 512, 768]             38,603,520
│    │    └─Embedding: 3-2                                   [1, 512, 768]             768
│    │    └─Embedding: 3-3                                   [1, 512, 768]             394,752
│    │    └─LayerNorm: 3-4                                   [1, 512, 768]             1,536
│    │    └─Dropout: 3-5                                     [1, 512, 768]             --
│    └─RobertaEncoder: 2-2                                   [1, 512, 768]             --
│    │    └─ModuleList: 3-6                                  --               

Set model to device, initialize trainer

In [13]:
model.to(params.device)
print(f"Trained Dataset: {dataset_path}")
print(f"Device: {params.device}")

optimizer = torch.optim.Adam(params=model.parameters(), lr=params.learning_rate) #roberta

trainer = Trainer(model=model,
                  device=params.device,
                  tokenizer=params.tokenizer,
                  train_dataloader=train_dataloader,
                  validation_dataloader=validation_dataloader,
                  epochs=params.epochs,
                  optimizer=optimizer,
                  val_loss_fn=params.val_loss_fn,
                  notify=params.notify,
                  phone_number=params.phone_number,
                  save_dir=params.save_dir,
                  model_name=params.model_name, 
                  save_freq=params.save_freq)

Trained Dataset: data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv
Device: mps


Fit the model to our training data.

In [14]:
trainer.fit()

  incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
Epoch 1: 100%|██████████| 260/260 [05:49<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.43batch/s]


	 - Train loss: 0.720906
	 - Validation Loss: 0.670519
	 - Validation Accuracy: 0.614423
	 - Validation F1: 0.459765
	 - Validation Recall: 0.355855
	 - Validation Precision: 0.724194
	 * Model @ epoch 1 saved to model_saves/target-iSarcasm_inter_IMDB_03/E01_A0.61_F0.46


Epoch 2: 100%|██████████| 260/260 [05:51<00:00,  1.35s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.43batch/s]


	 - Train loss: 0.618313
	 - Validation Loss: 0.482698
	 - Validation Accuracy: 0.760577
	 - Validation F1: 0.735566
	 - Validation Recall: 0.721010
	 - Validation Precision: 0.776746
	 * Model @ epoch 2 saved to model_saves/target-iSarcasm_inter_IMDB_03/E02_A0.76_F0.74


Epoch 3: 100%|██████████| 260/260 [05:53<00:00,  1.36s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:27<00:00,  2.37batch/s]


	 - Train loss: 0.419698
	 - Validation Loss: 0.357019
	 - Validation Accuracy: 0.853846
	 - Validation F1: 0.830517
	 - Validation Recall: 0.775184
	 - Validation Precision: 0.918424
	 * Model @ epoch 3 saved to model_saves/target-iSarcasm_inter_IMDB_03/E03_A0.85_F0.83


Epoch 4: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:27<00:00,  2.40batch/s]


	 - Train loss: 0.221930
	 - Validation Loss: 0.305668
	 - Validation Accuracy: 0.895192
	 - Validation F1: 0.890882
	 - Validation Recall: 0.910014
	 - Validation Precision: 0.884669
	 * Model @ epoch 4 saved to model_saves/target-iSarcasm_inter_IMDB_03/E04_A0.9_F0.89


Epoch 5: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.43batch/s]


	 - Train loss: 0.113571
	 - Validation Loss: 0.383156
	 - Validation Accuracy: 0.887500
	 - Validation F1: 0.884279
	 - Validation Recall: 0.940664
	 - Validation Precision: 0.844452
	 * Model @ epoch 5 saved to model_saves/target-iSarcasm_inter_IMDB_03/E05_A0.89_F0.88


Epoch 6: 100%|██████████| 260/260 [05:52<00:00,  1.36s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.43batch/s]


	 - Train loss: 0.075682
	 - Validation Loss: 0.335626
	 - Validation Accuracy: 0.907692
	 - Validation F1: 0.901919
	 - Validation Recall: 0.930004
	 - Validation Precision: 0.887407
	 * Model @ epoch 6 saved to model_saves/target-iSarcasm_inter_IMDB_03/E06_A0.91_F0.9


Epoch 7: 100%|██████████| 260/260 [05:53<00:00,  1.36s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.060725
	 - Validation Loss: 0.358516
	 - Validation Accuracy: 0.899038
	 - Validation F1: 0.897002
	 - Validation Recall: 0.935261
	 - Validation Precision: 0.871919
	 * Model @ epoch 7 saved to model_saves/target-iSarcasm_inter_IMDB_03/E07_A0.9_F0.9


Epoch 8: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.46batch/s]


	 - Train loss: 0.033853
	 - Validation Loss: 0.416645
	 - Validation Accuracy: 0.898077
	 - Validation F1: 0.895188
	 - Validation Recall: 0.933862
	 - Validation Precision: 0.871095
	 * Model @ epoch 8 saved to model_saves/target-iSarcasm_inter_IMDB_03/E08_A0.9_F0.9


Epoch 9: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.47batch/s]


	 - Train loss: 0.028918
	 - Validation Loss: 0.513128
	 - Validation Accuracy: 0.886538
	 - Validation F1: 0.886966
	 - Validation Recall: 0.937342
	 - Validation Precision: 0.852207
	 * Model @ epoch 9 saved to model_saves/target-iSarcasm_inter_IMDB_03/E09_A0.89_F0.89


Epoch 10: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.47batch/s]


	 - Train loss: 0.044890
	 - Validation Loss: 0.458067
	 - Validation Accuracy: 0.881731
	 - Validation F1: 0.879546
	 - Validation Recall: 0.939796
	 - Validation Precision: 0.839375
	 * Model @ epoch 10 saved to model_saves/target-iSarcasm_inter_IMDB_03/E10_A0.88_F0.88


## intermediate_hellaswag

In [None]:
# set params for this model
params.num_labels = 2
params.output_dir = "model_saves/target-iSarcasm_inter-hellaswag_03"

### Train

Download transformers.RobertaForSequenceClassificatio, which is a RoBERTa model with a linear layer for sentence classification (or regression) on top of the pooled output:

In [12]:
# Load the RobertaForSequenceClassification model
model = RobertaForSequenceClassification.from_pretrained(intermediate_model_path,
                                                         num_labels = params.num_labels,
                                                         local_files_only=True, 
                                                         ignore_mismatched_sizes=True,
                                                         output_attentions = False,
                                                         output_hidden_states = False,
                                                         )

from torchinfo import summary
summary(model, input_size=(1, 512), dtypes=['torch.IntTensor'])

Layer (type:depth-idx)                                       Output Shape              Param #
RobertaForSequenceClassification                             [1, 2]                    --
├─RobertaModel: 1-1                                          [1, 512, 768]             --
│    └─RobertaEmbeddings: 2-1                                [1, 512, 768]             --
│    │    └─Embedding: 3-1                                   [1, 512, 768]             38,603,520
│    │    └─Embedding: 3-2                                   [1, 512, 768]             768
│    │    └─Embedding: 3-3                                   [1, 512, 768]             394,752
│    │    └─LayerNorm: 3-4                                   [1, 512, 768]             1,536
│    │    └─Dropout: 3-5                                     [1, 512, 768]             --
│    └─RobertaEncoder: 2-2                                   [1, 512, 768]             --
│    │    └─ModuleList: 3-6                                  --               

Set model to device, initialize trainer

In [13]:
model.to(params.device)
print(f"Trained Dataset: {dataset_path}")
print(f"Device: {params.device}")

optimizer = torch.optim.Adam(params=model.parameters(), lr=params.learning_rate) #roberta

trainer = Trainer(model=model,
                  device=params.device,
                  tokenizer=params.tokenizer,
                  train_dataloader=train_dataloader,
                  validation_dataloader=validation_dataloader,
                  epochs=params.epochs,
                  optimizer=optimizer,
                  val_loss_fn=params.val_loss_fn,
                  num_labels=params.num_labels,
                  output_dir=params.output_dir,
                  save_freq=params.save_freq,
                  checkpoint_freq=params.checkpoint_freq,)

Trained Dataset: data/target_semEval2022_en/iSarcasmEval-main/train/train.en.prepped-oversampled.csv
Device: mps


Fit the model to our training data.

In [14]:
trainer.fit()

Epoch 1: 100%|██████████| 260/260 [05:50<00:00,  1.35s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.590560
	 - Validation Loss: 0.424827
	 - Validation Accuracy: 0.805769
	 - Validation F1: 0.767911
	 - Validation Recall: 0.696248
	 - Validation Precision: 0.895647
	 * Model @ epoch 1 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E01_A0.81_F0.77
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E01_A0.81_F0.77/checkpoint.pt


Epoch 2: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.45batch/s]


	 - Train loss: 0.284567
	 - Validation Loss: 0.312427
	 - Validation Accuracy: 0.882692
	 - Validation F1: 0.877528
	 - Validation Recall: 0.940053
	 - Validation Precision: 0.833562
	 * Model @ epoch 2 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E02_A0.88_F0.88
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E02_A0.88_F0.88/checkpoint.pt


Epoch 3: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.47batch/s]


	 - Train loss: 0.115719
	 - Validation Loss: 0.256675
	 - Validation Accuracy: 0.925000
	 - Validation F1: 0.917065
	 - Validation Recall: 0.917652
	 - Validation Precision: 0.925378
	 * Model @ epoch 3 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E03_A0.92_F0.92
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E03_A0.92_F0.92/checkpoint.pt


Epoch 4: 100%|██████████| 260/260 [05:49<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:25<00:00,  2.52batch/s]


	 - Train loss: 0.067689
	 - Validation Loss: 0.299522
	 - Validation Accuracy: 0.909615
	 - Validation F1: 0.904544
	 - Validation Recall: 0.933489
	 - Validation Precision: 0.887402
	 * Model @ epoch 4 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E04_A0.91_F0.9
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E04_A0.91_F0.9/checkpoint.pt


Epoch 5: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.42batch/s]


	 - Train loss: 0.056081
	 - Validation Loss: 0.270399
	 - Validation Accuracy: 0.942308
	 - Validation F1: 0.938804
	 - Validation Recall: 0.914581
	 - Validation Precision: 0.971190
	 * Model @ epoch 5 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E05_A0.94_F0.94
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E05_A0.94_F0.94/checkpoint.pt


Epoch 6: 100%|██████████| 260/260 [05:48<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:27<00:00,  2.38batch/s]


	 - Train loss: 0.039217
	 - Validation Loss: 0.337257
	 - Validation Accuracy: 0.911538
	 - Validation F1: 0.907363
	 - Validation Recall: 0.941628
	 - Validation Precision: 0.884996
	 * Model @ epoch 6 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E06_A0.91_F0.91
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E06_A0.91_F0.91/checkpoint.pt


Epoch 7: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:25<00:00,  2.51batch/s]


	 - Train loss: 0.035717
	 - Validation Loss: 0.287735
	 - Validation Accuracy: 0.933654
	 - Validation F1: 0.926270
	 - Validation Recall: 0.939705
	 - Validation Precision: 0.922125
	 * Model @ epoch 7 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E07_A0.93_F0.93
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E07_A0.93_F0.93/checkpoint.pt


Epoch 8: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.50batch/s]


	 - Train loss: 0.028963
	 - Validation Loss: 0.286411
	 - Validation Accuracy: 0.936538
	 - Validation F1: 0.931752
	 - Validation Recall: 0.941104
	 - Validation Precision: 0.930437
	 * Model @ epoch 8 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E08_A0.94_F0.93
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E08_A0.94_F0.93/checkpoint.pt


Epoch 9: 100%|██████████| 260/260 [05:47<00:00,  1.34s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.50batch/s]


	 - Train loss: 0.025448
	 - Validation Loss: 0.396453
	 - Validation Accuracy: 0.901923
	 - Validation F1: 0.899960
	 - Validation Recall: 0.940090
	 - Validation Precision: 0.873059
	 * Model @ epoch 9 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E09_A0.9_F0.9
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E09_A0.9_F0.9/checkpoint.pt


Epoch 10: 100%|██████████| 260/260 [05:46<00:00,  1.33s/batch]
	 Validation 64: 100%|██████████| 65/65 [00:26<00:00,  2.49batch/s]


	 - Train loss: 0.021718
	 - Validation Loss: 0.360332
	 - Validation Accuracy: 0.915385
	 - Validation F1: 0.912023
	 - Validation Recall: 0.937782
	 - Validation Precision: 0.895779
	 * Model @ epoch 10 saved to model_saves/target-iSarcasm_inter-hellaswag_03/E10_A0.92_F0.91
	 * Model checkpoint saved to model_saves/target-iSarcasm_inter-hellaswag_03/E10_A0.92_F0.91/checkpoint.pt
