In [2]:
from optuna import Trial
from typing import Dict, Union, Any
from setfit import TrainingArguments
from setfit import SetFitModel

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def model_init(params: Dict[str, Any]) -> SetFitModel:
    params = params or {}
    max_iter = params.get("max_iter", 100)
    solver = params.get("solver", "liblinear")
    params = {
        "head_params": {
            "max_iter": max_iter,
            "solver": solver,
        }
    }
    return SetFitModel.from_pretrained("KBLab/sentence-bert-swedish-cased", **params)


In [4]:
def hp_space(trial: Trial) -> Dict[str, Union[float, int, str]]:
    return {
        "body_learning_rate": trial.suggest_float("body_learning_rate", 1e-6, 1e-3, log=True),
        "num_epochs": trial.suggest_int("num_epochs", 1, 5),
        "batch_size": trial.suggest_categorical("batch_size", [8, 16]),
        "seed": trial.suggest_int("seed", 1, 40),
        "max_iter": trial.suggest_int("max_iter", 50, 300),
        "solver": trial.suggest_categorical("solver", ["newton-cg", "lbfgs", "liblinear"]),
    }

The input is a csv file with three columns:
1) Requirement ID
2) Requirement text
3) Boolean label, which indicates whether all judges agreed in the classification

In [21]:
from datasets import load_dataset, DatasetDict
dataset = load_dataset("csv", data_files="../data/target_pilot_1_2_sv.csv")
dataset = dataset.class_encode_column("label")
dataset['train'].features

{'ID': Value(dtype='string', id=None),
 'text': Value(dtype='string', id=None),
 'label': ClassLabel(names=['False', 'True'], id=None)}

In [6]:
train_testvalid = dataset['train'].train_test_split(test_size=0.2, stratify_by_column='label')
test_valid = train_testvalid['test'].train_test_split(test_size=0.5, stratify_by_column='label')
ttv_dataset = DatasetDict({
    'train': train_testvalid['train'],
    'test': test_valid['test'],
    'valid': test_valid['train']})

In [7]:
ttv_dataset

DatasetDict({
    train: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 57
    })
    test: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 8
    })
    valid: Dataset({
        features: ['ID', 'text', 'label'],
        num_rows: 7
    })
})

In [16]:
ttv_dataset["train"][0]

{'ID': 'K2770',
 'text': 'Överkant på bomarm i avstängningsanordningar vid öppningsbar bro, vägoperativ miljö, ska vara 0,9 – 1,1 m över körbanan i stängt läge.',
 'label': 1}

In [8]:
from setfit import Trainer

In [9]:
trainer = Trainer(
    model_init=model_init,
    train_dataset=ttv_dataset["train"],
    eval_dataset=ttv_dataset["valid"]
)

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
Currently using DataParallel (DP) for multi-gpu training, while DistributedDataParallel (DDP) is recommended for faster training. See https://sbert.net/docs/sentence_transformer/training/distributed.html for more information.
  obj.co_lnotab,  # for < python 3.10 [not counted in args]
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 4466.36 examples/s]


In [10]:
best_run = trainer.hyperparameter_search(direction="maximize", hp_space=hp_space, n_trials=100)
print(best_run)

[I 2024-11-27 17:14:46,258] A new study created in memory with name: no-name-068a0a5a-0c3f-42e9-be25-fa3b0565ae22
Trial: {'body_learning_rate': 2.1490552520467495e-05, 'num_epochs': 4, 'batch_size': 16, 'seed': 4, 'max_iter': 74, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 4


Step,Training Loss
1,0.2693
50,0.2785
100,0.282
150,0.2775
200,0.2739


***** Running evaluation *****
[I 2024-11-27 17:16:10,069] Trial 0 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 2.1490552520467495e-05, 'num_epochs': 4, 'batch_size': 16, 'seed': 4, 'max_iter': 74, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 6.207321699607185e-06, 'num_epochs': 2, 'batch_size': 8, 'seed': 4, 'max_iter': 130, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 2


Step,Training Loss
1,0.3501
50,0.2698
100,0.2864
150,0.2899
200,0.2713


***** Running evaluation *****
[I 2024-11-27 17:17:14,967] Trial 1 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 6.207321699607185e-06, 'num_epochs': 2, 'batch_size': 8, 'seed': 4, 'max_iter': 130, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.0002172939771001778, 'num_epochs': 4, 'batch_size': 8, 'seed': 29, 'max_iter': 148, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 4


Step,Training Loss
1,0.2733
50,0.2745
100,0.2798
150,0.2738
200,0.2822
250,0.2826
300,0.2817
350,0.2783
400,0.2792


***** Running evaluation *****
[I 2024-11-27 17:19:22,052] Trial 2 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.0002172939771001778, 'num_epochs': 4, 'batch_size': 8, 'seed': 29, 'max_iter': 148, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 5.2657996451193935e-06, 'num_epochs': 4, 'batch_size': 8, 'seed': 10, 'max_iter': 184, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 4


Step,Training Loss
1,0.3297
50,0.2753
100,0.2813
150,0.2804
200,0.286
250,0.264
300,0.2911
350,0.2749
400,0.2728


***** Running evaluation *****
[I 2024-11-27 17:21:29,352] Trial 3 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 5.2657996451193935e-06, 'num_epochs': 4, 'batch_size': 8, 'seed': 10, 'max_iter': 184, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 3.1108236626808115e-05, 'num_epochs': 4, 'batch_size': 8, 'seed': 5, 'max_iter': 184, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 4


Step,Training Loss
1,0.3357
50,0.2802
100,0.2758
150,0.2921
200,0.2668
250,0.2761
300,0.2777
350,0.278
400,0.283


***** Running evaluation *****
[I 2024-11-27 17:23:36,686] Trial 4 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 3.1108236626808115e-05, 'num_epochs': 4, 'batch_size': 8, 'seed': 5, 'max_iter': 184, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.0005160980044482664, 'num_epochs': 2, 'batch_size': 16, 'seed': 19, 'max_iter': 107, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 2


Step,Training Loss
1,0.2837
50,0.2793
100,0.2783


***** Running evaluation *****
[I 2024-11-27 17:24:22,580] Trial 5 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.0005160980044482664, 'num_epochs': 2, 'batch_size': 16, 'seed': 19, 'max_iter': 107, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.0001753395242888614, 'num_epochs': 3, 'batch_size': 8, 'seed': 20, 'max_iter': 273, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 3


Step,Training Loss
1,0.4031
50,0.2777
100,0.28
150,0.2729
200,0.2775
250,0.3011
300,0.2751


***** Running evaluation *****
[I 2024-11-27 17:25:59,422] Trial 6 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.0001753395242888614, 'num_epochs': 3, 'batch_size': 8, 'seed': 20, 'max_iter': 273, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.00016562039774582234, 'num_epochs': 1, 'batch_size': 16, 'seed': 25, 'max_iter': 187, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 1


Step,Training Loss
1,0.2942
50,0.2783


***** Running evaluation *****
[I 2024-11-27 17:26:23,748] Trial 7 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.00016562039774582234, 'num_epochs': 1, 'batch_size': 16, 'seed': 25, 'max_iter': 187, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.00021048394947748554, 'num_epochs': 1, 'batch_size': 16, 'seed': 1, 'max_iter': 162, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 1


Step,Training Loss
1,0.2716
50,0.2799


***** Running evaluation *****
[I 2024-11-27 17:26:48,656] Trial 8 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.00021048394947748554, 'num_epochs': 1, 'batch_size': 16, 'seed': 1, 'max_iter': 162, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 2.8961946511177042e-05, 'num_epochs': 3, 'batch_size': 16, 'seed': 13, 'max_iter': 190, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 3


Step,Training Loss
1,0.3109
50,0.2788
100,0.2749
150,0.2796


***** Running evaluation *****
[I 2024-11-27 17:27:55,596] Trial 9 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 2.8961946511177042e-05, 'num_epochs': 3, 'batch_size': 16, 'seed': 13, 'max_iter': 190, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 1.4401860593496145e-06, 'num_epochs': 5, 'batch_size': 16, 'seed': 37, 'max_iter': 63, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 5


Step,Training Loss
1,0.3522
50,0.275
100,0.2888
150,0.2754
200,0.281
250,0.2798


***** Running evaluation *****
[I 2024-11-27 17:29:45,939] Trial 10 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 1.4401860593496145e-06, 'num_epochs': 5, 'batch_size': 16, 'seed': 37, 'max_iter': 63, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 1.0206818860703942e-05, 'num_epochs': 2, 'batch_size': 8, 'seed': 10, 'max_iter': 53, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 2


Step,Training Loss
1,0.3297
50,0.2753
100,0.2813
150,0.2804
200,0.286


***** Running evaluation *****
[I 2024-11-27 17:30:51,846] Trial 11 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 1.0206818860703942e-05, 'num_epochs': 2, 'batch_size': 8, 'seed': 10, 'max_iter': 53, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 6.933621682465579e-06, 'num_epochs': 2, 'batch_size': 8, 'seed': 1, 'max_iter': 112, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 2


Step,Training Loss
1,0.2142
50,0.2727
100,0.2852
150,0.278
200,0.278


***** Running evaluation *****
[I 2024-11-27 17:31:57,342] Trial 12 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 6.933621682465579e-06, 'num_epochs': 2, 'batch_size': 8, 'seed': 1, 'max_iter': 112, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 1.6422293498412811e-06, 'num_epochs': 5, 'batch_size': 16, 'seed': 6, 'max_iter': 101, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 5


Step,Training Loss
1,0.2803
50,0.278
100,0.2755
150,0.2859
200,0.2779
250,0.2749


***** Running evaluation *****
[I 2024-11-27 17:33:46,912] Trial 13 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 1.6422293498412811e-06, 'num_epochs': 5, 'batch_size': 16, 'seed': 6, 'max_iter': 101, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 1.417044763860623e-05, 'num_epochs': 3, 'batch_size': 8, 'seed': 15, 'max_iter': 234, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 3


Step,Training Loss
1,0.241
50,0.2796
100,0.2836
150,0.2769
200,0.2795
250,0.2791
300,0.2841


***** Running evaluation *****
[I 2024-11-27 17:35:24,140] Trial 14 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 1.417044763860623e-05, 'num_epochs': 3, 'batch_size': 8, 'seed': 15, 'max_iter': 234, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 3.433162756431486e-06, 'num_epochs': 2, 'batch_size': 16, 'seed': 8, 'max_iter': 79, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 2


Step,Training Loss
1,0.2715
50,0.2824
100,0.2765


***** Running evaluation *****
[I 2024-11-27 17:36:09,217] Trial 15 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 3.433162756431486e-06, 'num_epochs': 2, 'batch_size': 16, 'seed': 8, 'max_iter': 79, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 6.899771712328136e-05, 'num_epochs': 3, 'batch_size': 8, 'seed': 40, 'max_iter': 135, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 3


Step,Training Loss
1,0.2421
50,0.2795
100,0.2805
150,0.2797
200,0.2792
250,0.2801
300,0.2765


***** Running evaluation *****
[I 2024-11-27 17:37:45,959] Trial 16 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 6.899771712328136e-05, 'num_epochs': 3, 'batch_size': 8, 'seed': 40, 'max_iter': 135, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 1.6430177795136168e-05, 'num_epochs': 4, 'batch_size': 16, 'seed': 16, 'max_iter': 126, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 4


Step,Training Loss
1,0.2865
50,0.2802
100,0.2753
150,0.2865
200,0.2704


***** Running evaluation *****
[I 2024-11-27 17:39:14,574] Trial 17 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 1.6430177795136168e-05, 'num_epochs': 4, 'batch_size': 16, 'seed': 16, 'max_iter': 126, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 7.19407559735032e-05, 'num_epochs': 5, 'batch_size': 16, 'seed': 25, 'max_iter': 88, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 5


Step,Training Loss
1,0.2942
50,0.2783
100,0.2818
150,0.2772
200,0.2802
250,0.2848


***** Running evaluation *****
[I 2024-11-27 17:41:03,590] Trial 18 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 7.19407559735032e-05, 'num_epochs': 5, 'batch_size': 16, 'seed': 25, 'max_iter': 88, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 2.8863386826631727e-06, 'num_epochs': 1, 'batch_size': 8, 'seed': 4, 'max_iter': 218, 'solver': 'lbfgs'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 1


Step,Training Loss
1,0.3501
50,0.2698
100,0.2864


***** Running evaluation *****
[I 2024-11-27 17:41:38,437] Trial 19 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 2.8863386826631727e-06, 'num_epochs': 1, 'batch_size': 8, 'seed': 4, 'max_iter': 218, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 6.05547499639898e-05, 'num_epochs': 3, 'batch_size': 16, 'seed': 12, 'max_iter': 71, 'solver': 'liblinear'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 3


Step,Training Loss
1,0.3148
50,0.2766
100,0.2802
150,0.2772


***** Running evaluation *****
[I 2024-11-27 17:42:45,773] Trial 20 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 6.05547499639898e-05, 'num_epochs': 3, 'batch_size': 16, 'seed': 12, 'max_iter': 71, 'solver': 'liblinear'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 0.000993742028997601, 'num_epochs': 4, 'batch_size': 8, 'seed': 31, 'max_iter': 145, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 4


Step,Training Loss
1,0.4068
50,0.2829
100,0.2727
150,0.2819
200,0.2744
250,0.2799
300,0.2797
350,0.27
400,0.2816


***** Running evaluation *****
[I 2024-11-27 17:44:53,571] Trial 21 finished with value: 0.5714285714285714 and parameters: {'body_learning_rate': 0.000993742028997601, 'num_epochs': 4, 'batch_size': 8, 'seed': 31, 'max_iter': 145, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.5714285714285714.
Trial: {'body_learning_rate': 2.1693099973854055e-05, 'num_epochs': 4, 'batch_size': 8, 'seed': 29, 'max_iter': 155, 'solver': 'newton-cg'}
model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 8
  Num epochs = 4


Step,Training Loss
1,0.2733
50,0.2745


IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [11]:
trainer.apply_hyperparameters(best_run.hyperparameters, final_model=True)
trainer.train()

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
***** Running training *****
  Num unique pairs = 1706
  Batch size = 16
  Num epochs = 4


Step,Training Loss
1,0.2693
50,0.2785
100,0.282
150,0.2775
200,0.2739


In [12]:
trainer.evaluate()

***** Running evaluation *****


{'accuracy': 0.5714285714285714}

In [13]:
trainer.evaluate(ttv_dataset['test'])

***** Running evaluation *****


{'accuracy': 0.75}

In [14]:
trainer.model.save_pretrained("../models/agreement/target_sv")

In [23]:
trainer.model.predict(["Konfigurerbara system ska ha sina konfigurationer lagrade i servicedatorn."])

tensor([0])