In [1]:
!pip install setfit
!pip install datasets

Collecting setfit
  Downloading setfit-1.0.3-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.3.0 (from setfit)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers>=2.2.1 (from setfit)
  Downloading sentence_transformers-2.6.1-py3-none-any.whl (163 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.3/163.3 kB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate>=0.3.0 (from setfit)
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.3.0->setfit)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)

# Imports

In [2]:
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict, concatenate_datasets
import evaluate
import numpy as np
import random

from google.colab import userdata

from sklearn.metrics import accuracy_score, f1_score, classification_report

from huggingface_hub import login
from sentence_transformers.losses import CosineSimilarityLoss
from setfit import SetFitModel, SetFitTrainer

In [3]:
login(token=userdata.get('HF_TOKEN'))

  and should_run_async(code)


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Functions

In [4]:
# this function will encode the label into a onehot vector
def encode_labels(record):
    onehot_vec = [0 for x in range(6)]
    onehot_vec[record['intent']] = 1
    record['label'] = onehot_vec
    return record

In [5]:
def get_subset(dataset, instances_per_class = 8):

    # Create a dictionary to store examples by class
    class_examples = {}

    # Iterate through the dataset and group examples by class
    for example in dataset:
        label = example["intent"]
        if label not in class_examples:
            class_examples[label] = []
        class_examples[label].append(example)

    # Create a new subset with 8 examples of each class
    subset = []

    for label, examples in class_examples.items():
        if len(examples) >= instances_per_class:
            subset.extend(random.sample(examples, instances_per_class))
        else:
            # If there are fewer than 8 examples, you can choose to include them all or skip the class.
            # Here, we include all available examples for this class.
            subset.extend(examples)

    # Assuming 'subset' is your list of dictionaries containing text and labels
    subset_data = {
        "text": [example["text"] for example in subset],
        "intent": [example["intent"] for example in subset],
    }

    # Create a Dataset instance
    return Dataset.from_dict(subset_data)

In [6]:
def evaluate_model(ds, model):

    y_pred = model(ds['text'])
    y_true = ds['label']

    print(f'Accuracy: {round(accuracy_score(y_true, y_pred), 2)}')

    print(classification_report(y_true, y_pred, digits=4))


In [7]:
def hyperparameter_search_function(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3, log=True),
        "batch_size": trial.suggest_categorical("batch_size", [4, 8, 16, 32]),
        "multi_target_strategy": trial.suggest_categorical("multi_target_strategy", ["one-vs-rest", "multi-output", "classifier-chain"])
    }

In [8]:
def make_model(params=None):
    multi_target_strategy = params["multi_target_strategy"] if params else "one-vs-rest"
    return SetFitModel.from_pretrained(
        model_id, multi_target_strategy=multi_target_strategy
    )

# Global Variables

In [9]:
model_id = "sentence-transformers/all-MiniLM-L12-v2"
update_hf_model = False

LEARNING_RATE = 2.14e-5
NUM_EPOCHS = 5
BATCH_SIZE = 16
NUM_INTERACTIONS = 20
METRIC_ARGS = 'macro'
SEED = 42

# Read Data

In [10]:
dataset = load_dataset("ialvarenga/acl-arc-revised")

Downloading readme:   0%|          | 0.00/845 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/174k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.9k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/28.6k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1532 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/192 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/192 [00:00<?, ? examples/s]

In [11]:
# Select the train and test sets
train_ds = dataset["train"]
test_ds = dataset["test"]
eval_ds = dataset["eval"]

In [12]:
# here we have to map the taget column from a single int indicating the label to a vector, so that it can be in the right format for the neural network
train_ds = train_ds.map(encode_labels)
test_ds = test_ds.map(encode_labels)
eval_ds = eval_ds.map(encode_labels)

Map:   0%|          | 0/1532 [00:00<?, ? examples/s]

Map:   0%|          | 0/192 [00:00<?, ? examples/s]

Map:   0%|          | 0/192 [00:00<?, ? examples/s]

In [13]:
#TODO make a function to print each dataset distribution
concatenate_datasets([train_ds, test_ds, eval_ds]).to_pandas().intent.value_counts()

intent
0    985
1    359
2    347
4     86
3     71
5     68
Name: count, dtype: int64

# Experiment with all the data

In [14]:
all_data_model_name = "ialvarenga/setfit-experiment-all-data"

  and should_run_async(code)


In [15]:
model_fullset = SetFitModel.from_pretrained(model_id,  multi_target_strategy="one-vs-rest") # testar classifier-chain e multi-output

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.


In [16]:
# Create trainer
trainer = SetFitTrainer(
    model=model_fullset,
    metric='f1',
    metric_kwargs={'average': METRIC_ARGS},
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    loss_class=CosineSimilarityLoss,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    num_iterations=NUM_INTERACTIONS,
    num_epochs=NUM_EPOCHS,
    seed=SEED
)

  trainer = SetFitTrainer(


Map:   0%|          | 0/1532 [00:00<?, ? examples/s]

In [17]:
# Train and evaluate!
trainer.train()

***** Running training *****
  Num unique pairs = 61280
  Batch size = 16
  Num epochs = 5
  Total optimization steps = 19150


Step,Training Loss


Step,Training Loss


In [18]:
 trainer.evaluate()

  and should_run_async(code)
***** Running evaluation *****


Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

{'f1': 0.6251808803622275}

In [19]:
evaluate_model(test_ds, trainer.model)

Accuracy: 0.76
              precision    recall  f1-score   support

           0     0.8229    0.7980    0.8103        99
           1     0.8286    0.8056    0.8169        36
           2     0.6053    0.6571    0.6301        35
           3     1.0000    0.5714    0.7273         7
           4     1.0000    0.6250    0.7692         8
           5     0.7500    0.8571    0.8000         7

   micro avg     0.7849    0.7604    0.7725       192
   macro avg     0.8345    0.7190    0.7590       192
weighted avg     0.7955    0.7604    0.7736       192
 samples avg     0.7604    0.7604    0.7604       192



  and should_run_async(code)
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
if update_hf_model:
    trainer.push_to_hub(all_data_model_name)

  and should_run_async(code)


# Experiment with 8 instances per class

In [21]:
examples_8_model_name = "ialvarenga/setfit-experiment-8-examples"

* Como podemos ver, temos mais de 8 instâncias por classe. Então teremos que selecionar um subconjunto dos dados de treino.

In [22]:
train_ds_8_instances = get_subset(train_ds, 8)
train_ds_8_instances

Dataset({
    features: ['text', 'intent'],
    num_rows: 48
})

In [23]:
train_ds_8_instances = train_ds_8_instances.map(encode_labels)

Map:   0%|          | 0/48 [00:00<?, ? examples/s]

In [24]:
model_8_per_class = SetFitModel.from_pretrained(model_id,  multi_target_strategy="one-vs-rest")

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.


In [25]:
# Create trainer
trainer = SetFitTrainer(
    model=model_8_per_class,
    metric='f1',
    metric_kwargs={'average': METRIC_ARGS},
    train_dataset=train_ds_8_instances,
    eval_dataset=eval_ds,
    loss_class=CosineSimilarityLoss,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    num_iterations=NUM_INTERACTIONS,
    num_epochs=NUM_EPOCHS,
    seed=SEED
)

  trainer = SetFitTrainer(


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

In [26]:
# Train and evaluate!
#trainer.train()

In [27]:
#metrics = trainer.evaluate()
#metrics

In [28]:
if update_hf_model:
    trainer.push_to_hub(examples_8_model_name)

In [29]:
#evaluate_model(test_ds, trainer.model)

# Experiment with 32 instances

In [30]:
examples_32_model_name = "ialvarenga/setfit-experiment-32-examples"

In [31]:
train_32_instances = get_subset(train_ds, 32)
train_32_instances

Dataset({
    features: ['text', 'intent'],
    num_rows: 192
})

In [32]:
train_32_instances = train_32_instances.map(encode_labels)

Map:   0%|          | 0/192 [00:00<?, ? examples/s]

In [33]:
model_32_per_class = SetFitModel.from_pretrained(model_id,  multi_target_strategy="one-vs-rest")

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.


In [34]:
# Create trainer
trainer = SetFitTrainer(
    model=model_32_per_class,
    metric='f1',
    metric_kwargs={'average': METRIC_ARGS},
    train_dataset=train_32_instances,
    eval_dataset=eval_ds,
    loss_class=CosineSimilarityLoss,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    num_iterations=NUM_INTERACTIONS,
    num_epochs=NUM_EPOCHS,
    seed=SEED
)

  trainer = SetFitTrainer(


Map:   0%|          | 0/192 [00:00<?, ? examples/s]

In [35]:
# Train and evaluate!
#trainer.train()

In [36]:
#metrics = trainer.evaluate()
#metrics

In [37]:
if update_hf_model:
    trainer.push_to_hub(examples_32_model_name)

In [38]:
#evaluate_model(test_ds, trainer.model)