In [1]:
%pip install "sentence-transformers[train]" wandb codecarbon

Collecting sentence-transformers[train]
  Downloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting codecarbon
  Downloading codecarbon-2.4.2-py3-none-any.whl (494 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.9/494.9 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from sentence-transformers[train])
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate>=0.20.3 (from sentence-tra

In [2]:
from datasets import load_dataset
from sentence_transformers import (
    SentenceTransformer,
    SentenceTransformerTrainer,
    SentenceTransformerTrainingArguments,
    SentenceTransformerModelCardData,
)
from sentence_transformers.losses import MultipleNegativesRankingLoss
from sentence_transformers.training_args import BatchSamplers
from sentence_transformers.evaluation import TripletEvaluator



This is the ID of the synthetic dataset we generated previously.

In [3]:
dataset_id = "davanstrien/similarity-dataset-sc2-8b"

In [34]:
# 1. Load a model to finetune with 2. (Optional) model card data

dataset_name = dataset_id.split('/')[-1]

model = SentenceTransformer(
    "microsoft/mpnet-base",
    model_card_data=SentenceTransformerModelCardData(
        language="en",
        license="apache-2.0",
        train_datasets=[{"name": dataset_name, "id": dataset_id}],
        model_name=f"MPNet base trained on {dataset_name}",
    )
)


Some weights of MPNetModel were not initialized from the model checkpoint at microsoft/mpnet-base and are newly initialized: ['mpnet.pooler.dense.bias', 'mpnet.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [35]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

In [43]:
# 3. Load a dataset to finetune on
dataset = load_dataset(dataset_id,split='train')
dataset

Dataset({
    features: ['anchor', 'positive', 'negative', 'generation'],
    num_rows: 2324
})

In [44]:
dataset = dataset.train_test_split(test_size=0.2)

In [45]:
dataset

DatasetDict({
    train: Dataset({
        features: ['anchor', 'positive', 'negative', 'generation'],
        num_rows: 1859
    })
    test: Dataset({
        features: ['anchor', 'positive', 'negative', 'generation'],
        num_rows: 465
    })
})

In [46]:
# 4. Define a loss function
loss = MultipleNegativesRankingLoss(model)

In [48]:
# 5. (Optional) Specify training arguments
args = SentenceTransformerTrainingArguments(
    # Required parameter:
    output_dir=f"models/{dataset_name}",
    # Optional training parameters:
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_ratio=0.1,
    fp16=False,  # Set to False if you get an error that your GPU can't run on FP16
    bf16=True,  # Set to True if you have a GPU that supports BF16
    batch_sampler=BatchSamplers.NO_DUPLICATES,  # MultipleNegativesRankingLoss benefits from no duplicate samples in a batch
    # Optional tracking/debugging parameters:
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    logging_steps=100,
    run_name=dataset_id,  # Will be used in W&B if `wandb` is installed
)


In [49]:
# 6. (Optional) Create an evaluator & evaluate the base model
dev_evaluator = TripletEvaluator(
    anchors=dataset['test']["anchor"],
    positives=dataset['test']["positive"],
    negatives=dataset['test']["negative"],
    name="code-prompt-similarity-dev",
)
dev_evaluator(model)

{'code-prompt-similarity-dev_cosine_accuracy': 0.8279569892473119,
 'code-prompt-similarity-dev_dot_accuracy': 0.4258064516129032,
 'code-prompt-similarity-dev_manhattan_accuracy': 0.864516129032258,
 'code-prompt-similarity-dev_euclidean_accuracy': 0.832258064516129,
 'code-prompt-similarity-dev_max_accuracy': 0.864516129032258}

In [50]:
# 7. Create a trainer & train
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
    loss=loss,
    evaluator=dev_evaluator,
)

In [51]:
trainer.train()

Step,Training Loss,Validation Loss,Code-prompt-similarity-dev Cosine Accuracy,Code-prompt-similarity-dev Dot Accuracy,Code-prompt-similarity-dev Manhattan Accuracy,Code-prompt-similarity-dev Euclidean Accuracy,Code-prompt-similarity-dev Max Accuracy
100,0.8599,0.159901,0.950538,0.043011,0.941935,0.946237,0.950538
200,0.1914,0.123644,0.96129,0.032258,0.963441,0.963441,0.963441
300,0.0806,0.131011,0.95914,0.043011,0.95914,0.95914,0.95914
400,0.038,0.111195,0.965591,0.032258,0.96129,0.963441,0.965591
500,0.0183,0.126285,0.963441,0.034409,0.96129,0.963441,0.963441
600,0.0073,0.134239,0.956989,0.03871,0.954839,0.952688,0.956989
700,0.0036,0.133716,0.956989,0.03871,0.96129,0.956989,0.96129
800,0.0021,0.123739,0.95914,0.03871,0.954839,0.954839,0.95914
900,0.0013,0.129441,0.956989,0.04086,0.956989,0.956989,0.956989
1000,0.0011,0.132313,0.96129,0.03871,0.956989,0.95914,0.96129


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

TrainOutput(global_step=1170, training_loss=0.10300025749219278, metrics={'train_runtime': 409.1158, 'train_samples_per_second': 45.439, 'train_steps_per_second': 2.86, 'total_flos': 0.0, 'train_loss': 0.10300025749219278, 'epoch': 10.0})

In [52]:
# (Optional) Evaluate the trained model on the test set
test_evaluator = TripletEvaluator(
    anchors=dataset['test']["anchor"],
    positives=dataset['test']["positive"],
    negatives=dataset['test']["negative"],
)
test_evaluator(model)

{'cosine_accuracy': 0.9612903225806452,
 'dot_accuracy': 0.03870967741935484,
 'manhattan_accuracy': 0.9612903225806452,
 'euclidean_accuracy': 0.9612903225806452,
 'max_accuracy': 0.9612903225806452}

In [None]:
# (Optional) Push it to the Hugging Face Hub
model.push_to_hub("code-prompt-similarity-model")