In [1]:
!pip install transformers datasets evaluate
!pip install accelerate -U
!pip install transformers[torch]
!pip install torch -U

Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Collecting responses<0.19 (from evaluate)
  Downloading

In [263]:
from datasets import load_dataset
# testing_set = load_dataset("davidgaofc/RM_inout")
testing_set = load_dataset("davidgaofc/PRIMA_inout")

In [264]:
from transformers import AutoTokenizer

In [265]:
tokenizer = AutoTokenizer.from_pretrained("davidgaofc/ShadowAttackF")

In [266]:
def preprocess_function(examples):
  return tokenizer(examples["Text"], truncation = True)

def clean_function(examples):
  examples['label'] = examples['Label']
  return examples

In [267]:
temp_tokenized_dataset = testing_set.map(preprocess_function, batched = True)
tokenized_dataset = temp_tokenized_dataset.map(clean_function, batched = True)

In [268]:
from transformers import DataCollatorWithPadding

In [269]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [270]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")
f1_score = load_metric("f1")
precision_met = load_metric("precision")
recall_met = load_metric("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric.compute(predictions=predictions, references=labels)
    f1 = f1_score.compute(predictions=predictions, references=labels, average="weighted")
    precision = precision_met.compute(predictions=predictions, references=labels, average="weighted")
    recall = recall_met.compute(predictions=predictions, references=labels, average="weighted")
    return {"accuracy": accuracy["accuracy"], "f1": f1["f1"], "precision": precision['precision'], "recall": recall['recall']}

In [271]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model = AutoModelForSequenceClassification.from_pretrained("davidgaofc/ShadowAttackF", num_labels=2)


In [272]:
training_args = TrainingArguments(output_dir="training", learning_rate=2e-5, weight_decay=0.01,
    num_train_epochs=20,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    # load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    # train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["train"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics = compute_metrics,
)

In [273]:
import torch

torch.cuda.empty_cache()


In [274]:
trainer.evaluate()

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 3.6540050506591797,
 'eval_accuracy': 0.4945121951219512,
 'eval_f1': 0.4792232859464675,
 'eval_precision': 0.4937820062382149,
 'eval_recall': 0.4945121951219512,
 'eval_runtime': 15.1702,
 'eval_samples_per_second': 108.107,
 'eval_steps_per_second': 6.79}