In [None]:
import datasets
from transformers import AutoModelForSequenceClassification, ElectraTokenizerFast
from transformers import Trainer, TrainingArguments
from transformers import set_seed
from pathlib import Path
import wandb

In [None]:
xnli_es = datasets.load_dataset("xnli", "es")

In [None]:
# >joeddav
# >Aug '20
# >
# >@rsk97 In addition, just make sure the model used is trained on an NLI task and that the **last output label corresponds to entailment** while the **first output label corresponds to contradiction**.
#
# => We change the original `label` and use the `labels` column, which is required by a `AutoModelForSequenceClassification`
def switch_label_id(row):
    if row["label"] == 0:
        return {"labels": 2}
    elif row["label"] == 2:
        return {"labels": 0}
    else:
        return {"labels": 1}

for split in xnli_es:
    xnli_es[split] = xnli_es[split].map(switch_label_id)

In [None]:
# Keep in mind that the accented tokens were not optimized in our pretrained language model
# -> strip_accents=False means that we optimize them from scratch during the fine-tuning 
tokenizer = ElectraTokenizerFast.from_pretrained("Recognai/selectra_small", strip_accents=False)

In [None]:
def tokenize(row):
    return tokenizer(row["premise"], row["hypothesis"], truncation=True, max_length=512)

In [None]:
data = {}

for split in xnli_es:
    data[split] = xnli_es[split].map(
        tokenize, 
        remove_columns=["hypothesis", "premise", "label"], 
        batched=True, 
        batch_size=32
    )

In [None]:
metric = datasets.load_metric("xnli", "es")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Small

In [None]:
# we performed a random seed sweep and settled on 2
set_seed(2)
model = AutoModelForSequenceClassification.from_pretrained("Recognai/selectra_small", num_labels=3)

In [None]:
model.num_parameters() / 1e6

In [None]:
training_args = TrainingArguments(
    output_dir='./results_small_seed2CasedUnstripped',          # output directory
    do_train=True,
    do_eval=True,
    evaluation_strategy="steps",  # "steps",
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    num_train_epochs=5,
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_ratio=0.1,
    weight_decay=0.01,               # strength of weight decay
    learning_rate=1e-4,
    adam_epsilon=1e-6,
    logging_dir='./logs',            # directory for storing logs
    logging_steps=250,
    save_steps=500,  # ignored when using load_best_model_at_end
    save_total_limit=10,
    #no_cuda=False,
    #disable_tqdm=True,
    seed=2
)

In [None]:
trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,                  # training arguments, defined above
    train_dataset=data["train"],         # training dataset
    eval_dataset=data["validation"],          # evaluation dataset
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
!ls results_small_seed2CasedUnstripped/

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("results_small_seed2CasedUnstripped/checkpoint-45000", num_labels=3)

In [None]:
training_args = TrainingArguments(
    output_dir='./results_small',          # output directory
    do_train=True,
    do_eval=True,
    evaluation_strategy="steps",  # "steps",
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    num_train_epochs=5,
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_ratio=0.1,
    weight_decay=0.01,               # strength of weight decay
    learning_rate=1e-4,
    adam_epsilon=1e-6,
    logging_dir='./logs',            # directory for storing logs
    logging_steps=250,
    save_steps=500,  # ignored when using load_best_model_at_end
    save_total_limit=10,
    #no_cuda=False,
    #disable_tqdm=True,
    report_to="none",
)

In [None]:
trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,                  # training arguments, defined above
    train_dataset=data["train"],         # training dataset
    eval_dataset=data["validation"],          # evaluation dataset
    compute_metrics=compute_metrics,
)

In [None]:
trainer.evaluate(data["test"])

In [None]:
!cp -r results_small_seed2CasedUnstripped/checkpoint-45000 small_final

# Medium

In [None]:
# we performed a random seed sweep and settled on 2
set_seed(2)
model = AutoModelForSequenceClassification.from_pretrained("Recognai/selectra_medium", num_labels=3)

In [None]:
model.num_parameters() / 1e6

In [None]:
training_args = TrainingArguments(
    output_dir='./results_medium_seed2CasedUnstripped',          # output directory
    do_train=True,
    do_eval=True,
    evaluation_strategy="steps",  # "steps",
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    num_train_epochs=5,
    per_device_train_batch_size=32,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_ratio=0.1,
    weight_decay=0.01,               # strength of weight decay
    learning_rate=1e-4,
    adam_epsilon=1e-6,
    logging_dir='./logs',            # directory for storing logs
    logging_steps=250,
    save_steps=500,  # ignored when using load_best_model_at_end
    save_total_limit=10,
    #no_cuda=False,
    #disable_tqdm=True,
    seed=2
)

In [None]:
trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,                  # training arguments, defined above
    train_dataset=data["train"],         # training dataset
    eval_dataset=data["validation"],          # evaluation dataset
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
!ls results_medium_seed2CasedUnstripped/

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("results_medium_seed2CasedUnstripped/checkpoint-23500", num_labels=3)

In [None]:
training_args = TrainingArguments(
    output_dir='./results_medium',          # output directory
    do_train=True,
    do_eval=True,
    evaluation_strategy="steps",  # "steps",
    eval_steps=500,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    num_train_epochs=5,
    per_device_train_batch_size=16,  # batch size per device during training
    per_device_eval_batch_size=32,   # batch size for evaluation
    warmup_ratio=0.1,
    #warmup_steps=len(data["train"]) // 32 * 5,  # 500
    weight_decay=0.01,               # strength of weight decay
    learning_rate=1e-4,
    adam_epsilon=1e-6,
    logging_dir='./logs',            # directory for storing logs
    logging_steps=250,
    save_steps=500,  # ignored when using load_best_model_at_end
    save_total_limit=10,
    #no_cuda=False,
    #disable_tqdm=True,
    report_to="none",
)
#training_args.report_to = "none"

In [None]:
trainer = Trainer(
    model,
    tokenizer=tokenizer,
    args=training_args,                  # training arguments, defined above
    train_dataset=data["train"],         # training dataset
    eval_dataset=data["validation"],          # evaluation dataset
    compute_metrics=compute_metrics,
)

In [None]:
trainer.evaluate(data["test"])

In [None]:
!cp results_medium_seed2CasedUnstripped/checkpoint-23500