In [1]:
from optuna import Trial
from typing import Dict, Union, Any
from setfit import TrainingArguments
from setfit import SetFitModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def model_init(params: Dict[str, Any]) -> SetFitModel:
    params = params or {}
    max_iter = params.get("max_iter", 100)
    solver = params.get("solver", "liblinear")
    params = {
        "head_params": {
            "max_iter": max_iter,
            "solver": solver,
        }
    }
    return SetFitModel.from_pretrained("sentence-transformers/sentence-t5-xl", **params)


In [3]:
def hp_space(trial: Trial) -> Dict[str, Union[float, int, str]]:
    return {
        #"body_learning_rate": trial.suggest_float("body_learning_rate", 1e-6, 1e-3, log=True),
        "num_epochs": trial.suggest_int("num_epochs", 1, 10),
        "batch_size": trial.suggest_categorical("batch_size", [16, 32, 64]),
        #"seed": trial.suggest_int("seed", 1, 40),
        #"max_iter": trial.suggest_int("max_iter", 50, 300),
        #"solver": trial.suggest_categorical("solver", ["newton-cg", "lbfgs", "liblinear"]),
    }

In [4]:
import os
import torch
os.environ["CUDA_VISIBLE_DEVICES"]=""
torch.cuda.is_available()

False

In [5]:
from datasets import load_dataset
dataset = load_dataset("csv", data_files="../data/target_pilot_1_2_en.csv")
dataset = dataset.class_encode_column("label")

  obj.co_lnotab,  # for < python 3.10 [not counted in args]


In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 72
    })
})

In [7]:
dataset_tt = dataset["train"].train_test_split(train_size=58, stratify_by_column='label')

In [8]:
dataset_tt

DatasetDict({
    train: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 58
    })
    test: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 14
    })
})

In [9]:
from setfit import Trainer

In [10]:
args = TrainingArguments(
    batch_size=16,
    num_epochs=10,
)

In [11]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=dataset_tt["train"],
    eval_dataset=dataset_tt["test"]
)

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 4387.19 examples/s]


In [12]:
trainer.train()
trainer.evaluate()

***** Running training *****
  Num unique pairs = 1758
  Batch size = 16
  Num epochs = 10


Step,Training Loss
1,0.192
50,0.2595
100,0.2449
150,0.211
200,0.1074
250,0.0077
300,0.0025
350,0.0018
400,0.0012
450,0.001


***** Running evaluation *****                                                                                                                                                                 


{'accuracy': 0.6428571428571429}

In [None]:
best_run = trainer.hyperparameter_search(direction="maximize", hp_space=hp_space, n_trials=10)
print(best_run)

In [None]:
trainer.apply_hyperparameters(best_run.hyperparameters, final_model=True)
trainer.train()

In [None]:
trainer.evaluate()