In [21]:
import pandas as pd
import numpy as np
import datetime
import re
import string
import contractions
import nltk
from nltk.corpus import stopwords
import torch
import transformers
from transformers import AdamW, ElectraConfig, ElectraTokenizer, ElectraForSequenceClassification, ElectraModel, AutoTokenizer, TrainingArguments, DataCollatorWithPadding
import pytorch_lightning as pl
from torch.utils.data import TensorDataset, DataLoader, Dataset
import evaluate
from torch.optim import AdamW
from finetuning_scheduler import FinetuningScheduler 
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import LearningRateMonitor
from torchmetrics import Accuracy, BinaryF1Score, Precision, Recall


ImportError: cannot import name 'BinaryF1Score' from 'torchmetrics' (/home/vscode/.local/lib/python3.9/site-packages/torchmetrics/__init__.py)

In [2]:
# model = ElectraForSequenceClassification.from_pretrained("google/electra-small-discriminator", num_labels = 2)
tokenizer = ElectraTokenizer.from_pretrained("google/electra-small-discriminator")
#configuration = ElectraConfig()
#model = ElectraForSequenceClassification(configuration)

Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 3.04MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 29.0/29.0 [00:00<00:00, 8.13kB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 665/665 [00:00<00:00, 439kB/s]


In [3]:
df = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines = True)
df_train = df.drop(columns = ['article_link'])
col_types = {'headline':'str', 'is_sarcastic':'int32'}
df_train = df_train.astype(col_types)

In [4]:
train_size = int(0.8 * len(df_train))
val_size = int(0.1 * len(df_train))
test_size = len(df_train) - train_size - val_size

# Split the DataFrame into training and validation sets
train_df = df_train[:train_size]
val_df = df_train[train_size:train_size + val_size]
test_df = df_train[train_size + val_size:]

In [5]:
class SarcasmDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        text = self.data[idx]['headline']
        labels = self.data[idx]['is_sarcastic']
        encodings = self.tokenizer(text, max_length=self.max_length, padding='max_length', truncation=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='pt')
        return encodings['input_ids'][0], encodings['attention_mask'][0], torch.tensor(labels)


In [6]:
train_dataset = SarcasmDataset(train_df.to_dict('records'), tokenizer)
val_dataset = SarcasmDataset(val_df.to_dict('records'), tokenizer)
test_dataset = SarcasmDataset(test_df.to_dict('records'), tokenizer)

In [13]:
train_dataset.data

[{'is_sarcastic': 1,
  'headline': 'thirtysomething scientists unveil doomsday clock of hair loss'},
 {'is_sarcastic': 0,
  'headline': 'dem rep. totally nails why congress is falling short on gender, racial equality'},
 {'is_sarcastic': 0,
  'headline': 'eat your veggies: 9 deliciously different recipes'},
 {'is_sarcastic': 1,
  'headline': 'inclement weather prevents liar from getting to work'},
 {'is_sarcastic': 1,
  'headline': "mother comes pretty close to using word 'streaming' correctly"},
 {'is_sarcastic': 0, 'headline': 'my white inheritance'},
 {'is_sarcastic': 0, 'headline': '5 ways to file your taxes with less stress'},
 {'is_sarcastic': 1,
  'headline': "richard branson's global-warming donation nearly as much as cost of failed balloon trips"},
 {'is_sarcastic': 1,
  'headline': 'shadow government getting too large to meet in marriott conference room b'},
 {'is_sarcastic': 0, 'headline': 'lots of parents know this scenario'},
 {'is_sarcastic': 0,
  'headline': 'this lesbia

In [56]:

# train_encodings = tokenizer.batch_encode_plus(list(train_df['headline']), max_length=512, padding='max_length', truncation=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='pt')
# val_encodings = tokenizer.batch_encode_plus(list(val_df['headline']), max_length=512, padding='max_length', truncation=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='pt')
# test_encodings = tokenizer.batch_encode_plus(list(test_df['headline']), max_length=512, padding=True, truncation=True, return_attention_mask=True, return_token_type_ids=False, return_tensors='pt')

# train_input_ids = torch.tensor(train_encodings['input_ids'])
# train_attention_masks = torch.tensor(train_encodings['attention_mask'])
# val_input_ids = torch.tensor(val_encodings['input_ids'])
# val_attention_masks = torch.tensor(val_encodings['attention_mask'])
# test_input_ids = torch.tensor(test_encodings['input_ids'])
# test_attention_masks = torch.tensor(test_encodings['attention_mask'])

# train_labels = torch.tensor(train_df['is_sarcastic'].values)
# val_labels = torch.tensor(val_df['is_sarcastic'].values)
# test_labels = torch.tensor(test_df['is_sarcastic'].values)
     


  train_input_ids = torch.tensor(train_encodings['input_ids'])
  train_attention_masks = torch.tensor(train_encodings['attention_mask'])
  val_input_ids = torch.tensor(val_encodings['input_ids'])
  val_attention_masks = torch.tensor(val_encodings['attention_mask'])
  test_input_ids = torch.tensor(test_encodings['input_ids'])
  test_attention_masks = torch.tensor(test_encodings['attention_mask'])


In [17]:


class ElectraClassifier(pl.LightningModule):
    def __init__(self, model_name="google/electra-small-discriminator", num_labels=2, learning_rate=2e-5):
        super().__init__()
        self.save_hyperparameters()
        self.model = ElectraForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
        self.warmup_steps = 10000
        self.finetuning_scheduler = FinetuningScheduler
        self.train_accuracy = Accuracy(task='binary', num_classes=num_labels)
        self.val_accuracy = Accuracy(task='binary', num_classes=num_labels)
        self.train_precision = Precision(task='binary', num_classes=num_labels, average = 'weighted')
        self.val_precision = Precision(task='binary', num_classes=num_labels, average = 'weighted')
        self.train_recall = Recall(task='binary', num_classes=num_labels, average = 'weighted')
        self.val_recall = Recall(task='binary', num_classes=num_labels, average = 'weighted')
        self.f1_score = BinaryF1Score(task='binary', num_classes=num_labels)

        self.finetuning_scheduler.freeze(self.model.electra)

    def forward(self, input_ids, attention_mask, labels=None):
        return self.model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    
    def on_train_batch_start(self, batch, batch_idx):
        if self.global_step == self.warmup_steps:
            #unfreeze base layers
            for param in self.model.electra.parameters():
                param.requires_grad = True

    def training_step(self, batch, batch_idx):
        input_ids, attention_mask, labels = batch
        outputs = self(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        preds = outputs.logits.argmax(dim=-1)

        #logging
        acc = self.train_accuracy(preds, labels)
        prec = self.train_precision(preds, labels)
        rec = self.train_recall(preds, labels)
        self.log("train_loss", loss)
        self.log("train_accuracy", acc, on_step = True, on_epoch = True, prog_bar=True)
        self.log("train_precision", prec, on_step = True, on_epoch = True, prog_bar=True)
        self.log("train_recall", rec, on_step = True, on_epoch = True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids, attention_mask, labels = batch
        outputs = self(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        preds = outputs.logits.argmax(dim=-1)

        #logging
        acc = self.val_accuracy(preds, labels)
        prec = self.val_precision(preds, labels)
        rec = self.val_recall(preds, labels)
        self.log("val_loss", loss)
        self.log("val_accuracy", acc)
        self.log("val_precision", prec)
        self.log("val_recall", rec)

    def test_step(self, batch, batch_idx):
        input_ids, attention_mask, labels = batch
        outputs = self(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        preds = outputs.logits.argmax(dim=-1)

        #logging
        acc = self.val_accuracy(preds, labels)
        prec = self.val_precision(preds, labels)
        rec = self.val_recall(preds, labels)
        self.log("test_loss", loss)
        self.log("test_accuracy", acc)
        self.log("test_precision", prec)
        self.log("test_recall", rec)

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=0.0005)
        return optimizer

In [18]:
lr_monitor = LearningRateMonitor(logging_interval = 'step')

model = ElectraClassifier()
trainer = Trainer(
    max_epochs=10,
    callbacks=[lr_monitor]
)

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

In [19]:

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=6)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=6)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)
     


In [20]:
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                             | Params
---------------------------------------------------------------------
0 | model           | ElectraForSequenceClassification | 13.5 M
1 | train_accuracy  | BinaryAccuracy                   | 0     
2 | val_accuracy    | BinaryAccuracy                   | 0     
3 | train_precision | BinaryPrecision                  | 0     
4 | val_precision   | BinaryPrecision                  | 0     
5 | train_recall    | BinaryRecall                     | 0     
6 | val_recall      | BinaryRecall                     | 0     
---------------------------------------------------------------------
66.3 K    Trainable params
13.5 M    Non-trainable params
13.5 M    Total params
54.197    Total estimated model params size (MB)


Epoch 9: 100%|██████████| 1431/1431 [03:28<00:00,  6.86it/s, v_num=9, train_accuracy_step=0.467, train_precision_step=0.000, train_recall_step=0.000, train_accuracy_epoch=0.519, train_precision_epoch=0.0323, train_recall_epoch=0.0324] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1431/1431 [03:28<00:00,  6.85it/s, v_num=9, train_accuracy_step=0.467, train_precision_step=0.000, train_recall_step=0.000, train_accuracy_epoch=0.519, train_precision_epoch=0.0323, train_recall_epoch=0.0324]


In [22]:
checkpoint_path = "fine_tuned_model_ckpt"
trainer.save_checkpoint(checkpoint_path)

In [23]:
loaded_model = ElectraClassifier.load_from_checkpoint(checkpoint_path)

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier

In [24]:
trainer.test(loaded_model, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 179/179 [00:09<00:00, 19.33it/s]


[{'test_loss': 0.6917961835861206,
  'test_accuracy': 0.5260216593742371,
  'test_precision': 0.0,
  'test_recall': 0.0}]

In [None]:
# Create a new model with a different number of classes
transfer_model = ElectraClassifier(num_labels=3)

# Load the fine-tuned model weights, but keep the new classification head
transfer_model.model.electra.load_state_dict(loaded_model.model.electra.state_dict())

In [82]:
#training arguments

training_args = TrainingArguments(
    output_dir='.',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    evaluation_strategy='steps',
    eval_steps=500,
)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
#evaluation

predictions = trainer.predict(val_dataset)
preds = np.argmax(predictions.predictions, axis=1)

In [None]:
metric = evaluate.load("f1", "accuracy", "precision")
results = metric.compute(predictions=preds, references=predictions.label_ids)

In [83]:
def compute_metrics(eval_preds):
    metric = evaluate.load("accuracy", "precision", "f1")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=1)
    return metric.compute(predictions = predictions, references = labels)

In [71]:


trainer = Trainer(
    model, 
    training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [None]:
#evaluation

predictions = trainer.predict(val_dataset)
preds = np.argmax(predictions.predictions, axis=1)

In [None]:
metric = evaluate.load("f1", "accuracy", "precision")
results = metric.compute(predictions=preds, references=predictions.label_ids)
