In [21]:
import evaluate
import numpy as np
import ray

from datasets import load_dataset
from ray import tune
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)

In [22]:
def preprocess_function(data, tokenizer):
    return tokenizer(data["text"], truncation=True)


def compute_metrics(eval_pred):
    accuracy = evaluate.load("accuracy")
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)


def model_init(trial):
    return AutoModelForSequenceClassification.from_pretrained(
        model_id, num_labels=3, id2label=id2label, label2id=label2id
    )


In [23]:
model_id = "microsoft/deberta-v3-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)



In [24]:
training_dataset = load_dataset("csv", data_files="train.csv")
training_dataset = training_dataset["train"].train_test_split(test_size=0.1, seed=42)
tokenized_dataset = training_dataset.map(
    preprocess_function, fn_kwargs={"tokenizer": tokenizer}, batched=True
)
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8588
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 955
    })
})

In [25]:
id2label = {0: "NEGATIVE", 1: "POSITIVE", 2: "NEUTRAL"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1, "NEUTRAL": 2}

In [26]:
training_args = TrainingArguments(
    output_dir="deberta_v3_base",
    learning_rate=2e-5,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    # weight_decay=0.01,
    evaluation_strategy="steps",
    save_strategy="steps",
    logging_steps=100,
    load_best_model_at_end=False,  # true checkpoints and crushes space
    push_to_hub=False,
    report_to="none",
)

trainer = Trainer(
    model=None,
    model_init=model_init,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

tune_config = {
    "learning_rate": tune.loguniform(1e-6, 1e-4),
    "per_device_train_batch_size": tune.choice([2, 4, 8, 16, 32, 64]),
    # "per_device_eval_batch_size": tune.choice([2, 4, 8, 16, 32, 64]),
    "num_train_epochs": tune.choice([2, 3]),
}

trainer.hyperparameter_search(
    direction="maximize",
    backend="ray",
    hp_space=lambda _: tune_config,
    n_trials=1,  # Set this to however many iterations you can afford
    # checkpoint_score_attr="training_iteration",
    # resources_per_trial={"cpu": 20},
    resources_per_trial={"gpu": 1},
    local_dir="~/ray_results/",
)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-05-02 19:24:30,736	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-05-02 19:24:30 (running for 00:00:00.11)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_19-24-30/_objective_2024-05-02_19-24-30/driver_artifacts
Number of trials: 1/1 (1 PENDING)


== Status ==
Current time: 2024-05-02 19:24:35 (running for 00:00:05.15)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_19-24-30/_objective_2024-05-02_19-24-30/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-05-02 19:24:40 (running for 00:00:10.17)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_1

Trial name,epoch,eval_accuracy,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,objective,should_checkpoint
_objective_99672_00000,1.85874,0.902618,0.298648,3.6548,261.304,16.417,0.902618,True


== Status ==
Current time: 2024-05-02 19:25:16 (running for 00:00:45.30)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_19-24-30/_objective_2024-05-02_19-24-30/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-05-02 19:25:21 (running for 00:00:50.32)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_19-24-30/_objective_2024-05-02_19-24-30/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2024-05-02 19:25:26 (running for 00:00:55.34)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_1

2024-05-02 19:28:16,859	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/ubuntu/ray_results/_objective_2024-05-02_19-24-30' in 0.0023s.
2024-05-02 19:28:16,861	INFO tune.py:1039 -- Total run time: 226.13 seconds (226.11 seconds for the tuning loop).


== Status ==
Current time: 2024-05-02 19:28:16 (running for 00:03:46.11)
Using FIFO scheduling algorithm.
Logical resource usage: 0/24 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:A100)
Result logdir: /tmp/ray/session_2024-05-02_19-11-24_126744_107862/artifacts/2024-05-02_19-24-30/_objective_2024-05-02_19-24-30/driver_artifacts
Number of trials: 1/1 (1 TERMINATED)
+------------------------+------------+-----------------+-----------------+--------------------+------------------------+-------------+
| Trial name             | status     | loc             |   learning_rate |   num_train_epochs |   per_device_train_bat |   objective |
|                        |            |                 |                 |                    |                ch_size |             |
|------------------------+------------+-----------------+-----------------+--------------------+------------------------+-------------|
| _objective_99672_00000 | TERMINATED | 10.1.0.4:109943 |     5.61152e-06 |                

BestRun(run_id='99672_00000', objective=0.9026178010471204, hyperparameters={'learning_rate': 5.61151641533451e-06, 'per_device_train_batch_size': 32, 'num_train_epochs': 2}, run_summary=<ray.tune.analysis.experiment_analysis.ExperimentAnalysis object at 0x76b52e790d00>)