# Using SupCS for Training GLUE Tasks

## Dependencies

Install torch, tensorflow and SupCL-Seq packages using pip.

In [None]:
#!pip install datasets numpy 
#!pip install -U scikit-learn

In [None]:
from datasets import load_dataset, load_metric
from transformers import TrainingArguments, Trainer, AutoTokenizer, AutoModel,AutoModelForSequenceClassification
#----for roberta-----#
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaModel
from sklearn.metrics import classification_report
import warnings
import numpy as np

from SupCL_Seq import SupCsTrainer

warnings.filterwarnings('ignore')

## GLUE Tasks

In [None]:
GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]

task = "cola"
model_name = "nghuyong/ernie-2.0-en"#"roberta-base" #"bert-base-uncased"

actual_task = "mnli" if task == "mnli-mm" else task
dataset = load_dataset("glue", actual_task)
metric = load_metric('glue', actual_task)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModel.from_pretrained(model_name)

task_to_keys = {
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "mnli-mm": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence"),
    "qqp": ("question1", "question2"),
    "rte": ("sentence1", "sentence2"),
    "sst2": ("sentence", None),
    "stsb": ("sentence1", "sentence2"),
    "wnli": ("sentence1", "sentence2"),
}


sentence1_key, sentence2_key = task_to_keys[task]

def preprocess_function(examples):
    if sentence2_key is None:
        return tokenizer(examples[sentence1_key], truncation=True)
    return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)

encoded_dataset = dataset.map(preprocess_function, batched=True)

## Custom Metric

We employ a task dependent metric.

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    if task != "stsb":
        predictions = np.argmax(predictions, axis=1)
    else:
        predictions = predictions[:, 0]
    return metric.compute(predictions=predictions, references=labels)

In [None]:
validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
train_dataset = encoded_dataset["train"]
test_dataset = encoded_dataset[validation_key]


## Training Argument From Huggingface

In [None]:
CL_args = TrainingArguments(
        output_dir = './results',
        save_total_limit = 1,
        num_train_epochs=5,
        per_device_train_batch_size=12,  
        evaluation_strategy = 'no',
        logging_steps = 200,
        learning_rate = 5e-5,
        warmup_steps=50, 
        weight_decay=0.01,               
        logging_dir='./logs',
    )

## SupCL-Trainer

This works exactly similar to the trainer from huggingface. We first CS train and save the model.


In [None]:
SupCL_trainer = SupCsTrainer.SupCsTrainer(
            w_drop_out=[0.1, 0.2],
            temperature= 0.05,
            def_drop_out=0.1,
            pooling_strategy='mean',
            model = model,
            args = CL_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

In [None]:
SupCL_trainer.train()
SupCL_trainer.save_model('./cs_baseline')

## Only FineTune a Linear Layer

After CS training we only add a linear layer and then finetune its weights only by freezing the pretrained model base parameters. Finally, finetune the linear layer on the data using cross entropy.

In [None]:
model_name = './cs_baseline'#"./results/checkpoint-500/"
num_labels = 3 if actual_task =='mnli' else 2
if actual_task =='stsb': num_labels = 1

#------ Add classification layer ---------#
#model = RobertaForSequenceClassification.from_pretrained(model_name,num_labels=num_labels)
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=num_labels)
# ---- Freeze the base model -------#
for param in model.base_model.parameters():
                param.requires_grad = False

In [None]:
args = TrainingArguments(
        output_dir = './results',
        save_total_limit = 1,
        num_train_epochs=5,
        per_device_train_batch_size=28,  
        per_device_eval_batch_size=64,
        evaluation_strategy = 'epoch',
        logging_steps = 200,
        learning_rate = 1e-04,
        eval_steps = 200,
        warmup_steps=50, 
        report_to ='tensorboard',
        weight_decay=0.01,               
        logging_dir='./logs',
    )

In [None]:
trainer = Trainer(
            model,
            args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )

In [None]:
trainer.train()