https://medium.com/@lidores98/finetuning-huggingface-facebook-bart-model-2c758472e340

In [38]:
import pandas as pd
import torch
from datasets import Dataset, load_metric
import random
from transformers import BartTokenizerFast
from transformers import BartForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
import numpy as np
from transformers import pipeline

# Device Settings

In [39]:
torch.cuda.is_available()

True

In [40]:
device = torch.device('cuda')

# Loading Splits

In [41]:
train_path = '/home/pranavgoel/trans-fer-entropy/internal_relevance_annotation/' + \
'train_sample_for_relevance_classifier.csv'

train_df = pd.read_csv(train_path)

In [42]:
train_ds = Dataset.from_pandas(train_df)

In [43]:
test_path = '/home/pranavgoel/trans-fer-entropy/internal_relevance_annotation/' + \
'test_sample_for_relevance_classifier.csv'

test_df = pd.read_csv(test_path)

In [44]:
test_ds = Dataset.from_pandas(test_df)

# Model Output Path

In [26]:
model_path = '/data_users1/sagar/trans-fer-entropy/models'

In [46]:
model_name = 'few_shot_v3'

# Tokenizer

In [47]:
tokenizer = BartTokenizerFast.from_pretrained('facebook/bart-large-mnli')

# Preparing Datasets

In [48]:

def create_input_sequence(sample):
    
    input_text = sample["text"]
    
    if sample["label"]:
        label = "transgender"
    else:
        label = "not transgender"
    
    encoded_sequence = {"input": None, "label": None}
    
    encoded_sequence["input"] = tokenizer(text=input_text, truncation = True, padding = 'max_length')
    encoded_sequence["label"] = tokenizer(text=label, truncation = True, padding = 'max_length')
    
    return encoded_sequence

In [49]:
train_dataset = train_ds.map(create_input_sequence)
test_dataset = test_ds.map(create_input_sequence)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [50]:
training_args = TrainingArguments(
  output_dir = model_path,      # Output directory
  num_train_epochs = 30,             # Total number of training epochs
  per_device_train_batch_size = 8,  # Batch size per device during training
  per_device_eval_batch_size = 8,   # Batch size for evaluation
  warmup_steps = 500,                # Number of warmup steps for learning rate scheduler
  weight_decay = 0.01,               # Strength of weight decay
)

model = BartForSequenceClassification.from_pretrained("facebook/bart-large-mnli", num_labels = 2, ignore_mismatched_sizes = True)

trainer = Trainer(
  model = model,                     # The instantiated model to be trained
  args = training_args,              # Training arguments, defined above 
  train_dataset = train_dataset,     # Training dataset
  eval_dataset = test_dataset,       # Evaluation dataset
  tokenizer = tokenizer              # The tokenizer that was used
)

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-large-mnli and are newly initialized because the shapes did not match:
- classification_head.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([2]) in the model instantiated
- classification_head.out_proj.weight: found shape torch.Size([3, 1024]) in the checkpoint and torch.Size([2, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Saving

In [51]:
torch.save(model.state_dict(), f"/data_users1/sagar/trans-fer-entropy/models/{model_name}.pth")