In [2]:
# MINE
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


- [sbert](https://www.sbert.net/)
- [training_overview](https://www.sbert.net/docs/sentence_transformer/training_overview.html)
- [loss_overview](https://www.sbert.net/docs/sentence_transformer/loss_overview.html)
- [training_examples](https://www.sbert.net/docs/sentence_transformer/training/examples.html)
- [matryoshka](https://www.sbert.net/examples/sentence_transformer/training/matryoshka/README.html)
- [adaptive_layer](https://www.sbert.net/examples/sentence_transformer/training/adaptive_layer/README.html)
- [training_with_prompts](https://www.sbert.net/examples/sentence_transformer/training/prompts/README.html)
- [training_with_peft](https://www.sbert.net/examples/sentence_transformer/training/peft/README.html)
- [link text](https://)

In [3]:
# @title **All needed imports**
from datasets import load_dataset
from sentence_transformers import (
    SentenceTransformer,
    SentenceTransformerTrainer,
    SentenceTransformerTrainingArguments,
    SentenceTransformerModelCardData,
)
from sentence_transformers.losses import MultipleNegativesRankingLoss, \
AdaptiveLayerLoss, MatryoshkaLoss, CachedMultipleNegativesRankingLoss
from sentence_transformers.training_args import BatchSamplers
from sentence_transformers.evaluation import TripletEvaluator,\
 NanoBEIREvaluator

## **Embedding-Fine-Tuning-With-Prompts**

### **Load the Model**

In [4]:
# This is optional
model_card=SentenceTransformerModelCardData(
    language='en', license="mit",
    model_name="bge-large-en-v1.5"
)

# A must
model = SentenceTransformer(
    "BAAI/bge-large-en-v1.5",
    model_card_data=model_card
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [5]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [6]:
print(model._first_module().auto_model.config.output_hidden_states)
# Should print: True
# This needs to be adjusted when loading the model

False


In [7]:
model._first_module().auto_model.config

BertConfig {
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "dtype": "float32",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.56.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [8]:
# Adjust as needed
use_prompts = True
include_prompts_in_pooling = True

In [9]:

# This is optional
model_card=SentenceTransformerModelCardData(
    language='en', license="mit",
    model_name="bge-large-en-v1.5"
)

# A must
model = SentenceTransformer(
    "BAAI/bge-large-en-v1.5",
    model_card_data=model_card,
    # This fails
    # output_hidden_states=True
)

# This works
# # 2️⃣ Enable hidden states on the first module’s auto_model
# model._first_module().auto_model.config.output_hidden_states = True
model.set_pooling_include_prompt(include_prompts_in_pooling)

In [10]:
print(model._first_module().auto_model.config.output_hidden_states)
print(model._first_module().auto_model.config)
# Should print: True

False
BertConfig {
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "dtype": "float32",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.56.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [11]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [12]:
# @title **Load your dataset**
dataset = load_dataset("sentence-transformers/natural-questions", split="train")
dataset_dict = dataset.train_test_split(test_size=0.2, seed=30)

README.md: 0.00B [00:00, ?B/s]

pair/train-00000-of-00001.parquet:   0%|          | 0.00/44.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100231 [00:00<?, ? examples/s]

In [13]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['query', 'answer'],
        num_rows: 80184
    })
    test: Dataset({
        features: ['query', 'answer'],
        num_rows: 20047
    })
})

In [14]:
train_dataset = dataset_dict['train']
train_dataset

DatasetDict({
    train: Dataset({
        features: ['query', 'answer'],
        num_rows: 64147
    })
    test: Dataset({
        features: ['query', 'answer'],
        num_rows: 16037
    })
})

In [15]:
validation_dataset = dataset_dict['test']
validation_dataset

Dataset({
    features: ['query', 'answer'],
    num_rows: 64147
})

In [16]:
validation_dataset = train_eval_dict['test']
validation_dataset

Dataset({
    features: ['query', 'answer'],
    num_rows: 16037
})

### **Data Inspection**

In [18]:
# train
dataset_dict.column_names

{'train': ['query', 'answer'], 'test': ['query', 'answer']}

In [19]:
def random_examples_selector(given_dataset):
   from random import randint
   query = given_dataset['query'][randint(0, len(given_dataset))]
   answer = given_dataset['answer'][randint(0, len(given_dataset))]

   print(f"query: {query}")
   print(f"answer: {answer}")

In [20]:
random_examples_selector(train_dataset)

query: when was the first freeway built in los angeles
answer: Bob Johnston Johnston was born into a professional musical family. His grandmother Mamie Jo Adams was a songwriter, as was his mother Diane Johnston.[1] Diane had written songs for Gene Autry in the '50s and scored a hit in 1976 when Asleep at the Wheel covered her 1950 demo "Miles and Miles of Texas". After a stint in the Navy, Bob returned to Fort Worth, then he and Diane Johnston collaborated on songwriting for rockabilly artist Mac Curtis, and others. From 1956 to 1961 Bob recorded a few rockabilly singles under the name Don Johnston.[3] By 1964 he had moved into production work at Kapp Records in New York, freelance arranging for Dot Records and signed as a songwriter to music publisher Hill and Range. He also married songwriter Joy Byers with whom he began to collaborate.[4]


In [21]:
random_examples_selector(validation_dataset)

query: do jackie and hyde get back together season 7
answer: Battle of Leyte Gulf It was fought in waters near the Philippine islands of Leyte, Samar and Luzon, from 23â€“26 October 1944, between combined American and Australian forces and the Imperial Japanese Navy. On 20 October, United States troops invaded the island of Leyte as part of a strategy aimed at isolating Japan from the countries it had occupied in Southeast Asia, and in particular depriving Japanese forces and industry of vital oil supplies. The Imperial Japanese Navy (IJN) mobilized nearly all of its remaining major naval vessels in an attempt to defeat the Allied invasion but was repulsed by the U.S. Navy's Third and Seventh fleets. The IJN failed to achieve its objective, suffered heavy losses, and never sailed to battle in comparable force thereafter. The majority of its surviving heavy ships, deprived of fuel, remained in their bases for the rest of the Pacific War and suffered under heavy sustained aerial attack.[

In [22]:
random_examples_selector(test_dataset)

query: where does the story of cinderella come from
answer: Barium nitrate Barium nitrate with chemical formula Ba(NO3)2 is a salt composed of barium and the nitrate ion.


In [23]:
def examples_selector(given_dataset,index:int):
   query = given_dataset['query'][index]
   answer = given_dataset['answer'][index]

   print(f"query: {query}")
   print(f"answer: {answer}")

In [24]:
examples_selector(train_dataset,1)

query: where is the pyramid temple at borobudur located
answer: Borobudur Approximately 40 kilometres (25 mi) northwest of Yogyakarta and 86 kilometres (53 mi) west of Surakarta, Borobudur is located in an elevated area between two twin volcanoes, Sundoro-Sumbing and Merbabu-Merapi, and two rivers, the Progo and the Elo. According to local myth, the area known as Kedu Plain is a Javanese "sacred" place and has been dubbed "the garden of Java" due to its high agricultural fertility.[19] During the restoration in the early 20th century, it was discovered that three Buddhist temples in the region, Borobudur, Pawon and Mendut, are positioned along a straight line.[20] A ritual relationship between the three temples must have existed, although the exact ritual process is unknown.[14]


In [25]:
examples_selector(validation_dataset,1)

query: where did the saying debbie downer come from
answer: Debbie Downer The character's name, Debbie Downer, is a slang phrase which refers to someone who frequently adds bad news and negative feelings to a gathering, thus bringing down the mood of everyone around them. Dratch's character would usually appear at social gatherings and interrupt the conversation to voice negative opinions and pronouncements. She is especially concerned about the rate of feline AIDS, a subject that she would bring up on more than one occasion, saying it was the number one killer of domestic cats.


### **Loss Definition**

In [27]:
base_loss = CachedMultipleNegativesRankingLoss(model, mini_batch_size=16)

# Could include a Matryoshka Approach Loss/ Adaptive Loss or a combo of them
# matryoshka_dims = [1024,768, 512, 256, 128, 64]
# loss =  MatryoshkaLoss(model=model,
#                          loss=base_loss,
#                          matryoshka_dims=matryoshka_dims)

In [28]:
base_loss

CachedMultipleNegativesRankingLoss(
  (model): SentenceTransformer(
    (0): Transformer({'max_seq_length': 512, 'do_lower_case': True, 'architecture': 'BertModel'})
    (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
    (2): Normalize()
  )
  (cross_entropy_loss): CrossEntropyLoss()
)

### **Setting up Weights and Biases for Logging**

In [29]:
import wandb

In [30]:
wandb.login()

  | |_| | '_ \/ _` / _` |  _/ -_)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdannyai[0m ([33mdannyai-danny-the-analyst[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [31]:
import os
# set the wandb project where this run will be logged
os.environ["WANDB_PROJECT"]="embedding_fine_tuning_with_prompts_bge_large_en_v1.5"

# save your trained model checkpoint to wandb
# os.environ["WANDB_LOG_MODEL"]="true" # throws an error, must use 'checkpoint' or 'end'
os.environ["WANDB_LOG_MODEL"]="checkpoint"

# turn off watch to log faster
os.environ["WANDB_WATCH"]="false"

### **Specify Training Arguments**

[Training Arguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments.save_total_limit)

In [32]:
if use_prompts:
  query_prompt = "query: "
  corpus_prompt = "document: "
  prompts = {
        "query": query_prompt,
        "answer": corpus_prompt,
  }

In [33]:
training_args = SentenceTransformerTrainingArguments(
    # num_train_epochs=1, # full training,
    prompts = prompts if use_prompts else None,
    max_steps=100, # reduced from 300 to 100
    per_device_train_batch_size=5, # reduced from 16 to 5
    per_device_eval_batch_size=5, # reduced from 16 to 5
    learning_rate=2e-5,
    warmup_ratio=0.1,
    fp16=False,  # Set to False if you get an error that your GPU can't run on FP16
    bf16=True,  # Set to True if you have a GPU that supports BF16
    batch_sampler=BatchSamplers.NO_DUPLICATES,
    # Some optional tracking and debugging parameters:
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=3,
    logging_strategy="steps",
    logging_steps=100,
    # logging_first_step=True,
    load_best_model_at_end=True,
    seed = 30,
    output_dir = "outputs",
    run_name="embedding_fine_tuning_with_prompts_bge_large_en_v1.5",
    report_to=["wandb"] # reporting to Weights and biases project
)

In [34]:
print(training_args)

SentenceTransformerTrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
batch_sampler=BatchSamplers.NO_DUPLICATES,
bf16=True,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=False,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=100,
eval_

### **Evaluator**

- [TripletEvaluator](https://www.sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
- [SimilarityFunction](https://www.sbert.net/docs/package_reference/sparse_encoder/SparseEncoder.html#sentence_transformers.SimilarityFunction)
- [NanoBEIREvaluator](https://www.sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.NanoBEIREvaluator)
- [Example Code](https://github.com/UKPLab/sentence-transformers/blob/master/examples/sentence_transformer/training/prompts/training_nq_prompts.py)

In [35]:
from sentence_transformers.evaluation import SimilarityFunction
# Initialise the evaluator
 # dataset evaluator is from hugggingface
 # https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6
 # Will use only one dataset "QuoraRetrieval"
datasets  = ['QuoraRetrieval']
val_evaluator = NanoBEIREvaluator(
    dataset_names = datasets,
    query_prompts=query_prompt if use_prompts else None,
    corpus_prompts=corpus_prompt if use_prompts else None,
    batch_size=50,
    show_progress_bar=True
)

Loading NanoBEIR datasets:   0%|          | 0/1 [00:00<?, ?it/s]

README.md: 0.00B [00:00, ?B/s]

corpus/train-00000-of-00001.parquet:   0%|          | 0.00/229k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5046 [00:00<?, ? examples/s]

queries/train-00000-of-00001.parquet:   0%|          | 0.00/3.92k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50 [00:00<?, ? examples/s]

qrels/train-00000-of-00001.parquet:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/70 [00:00<?, ? examples/s]



In [36]:
# @title **Create an evaluator & evaluate the base model**
val_evaluator(model)
# {'all-nli-val_cosine_accuracy': 0.9548906683921814}

Evaluating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Corpus Chunks:   0%|          | 0/1 [00:00<?, ?it/s][A

Batches:   0%|          | 0/101 [00:00<?, ?it/s]


Corpus Chunks: 100%|██████████| 1/1 [00:19<00:00, 19.98s/it]
Evaluating datasets: 100%|██████████| 1/1 [00:21<00:00, 21.01s/it]


{'NanoQuoraRetrieval_cosine_accuracy@1': 0.92,
 'NanoQuoraRetrieval_cosine_accuracy@3': 0.98,
 'NanoQuoraRetrieval_cosine_accuracy@5': 0.98,
 'NanoQuoraRetrieval_cosine_accuracy@10': 1.0,
 'NanoQuoraRetrieval_cosine_precision@1': 0.92,
 'NanoQuoraRetrieval_cosine_precision@3': 0.4133333333333333,
 'NanoQuoraRetrieval_cosine_precision@5': 0.25199999999999995,
 'NanoQuoraRetrieval_cosine_precision@10': 0.13999999999999999,
 'NanoQuoraRetrieval_cosine_recall@1': 0.8073333333333332,
 'NanoQuoraRetrieval_cosine_recall@3': 0.9520000000000001,
 'NanoQuoraRetrieval_cosine_recall@5': 0.9553333333333334,
 'NanoQuoraRetrieval_cosine_recall@10': 1.0,
 'NanoQuoraRetrieval_cosine_ndcg@10': 0.9582650610304665,
 'NanoQuoraRetrieval_cosine_mrr@10': 0.9520000000000001,
 'NanoQuoraRetrieval_cosine_map@100': 0.9383253968253967,
 'NanoBEIR_mean_cosine_accuracy@1': 0.92,
 'NanoBEIR_mean_cosine_accuracy@3': 0.98,
 'NanoBEIR_mean_cosine_accuracy@5': 0.98,
 'NanoBEIR_mean_cosine_accuracy@10': 1.0,
 'NanoBEIR_m

In [37]:
# @title **Create a trainer & train**

trainer = SentenceTransformerTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    evaluator=val_evaluator,
    loss=base_loss
)

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

In [38]:
trainer.train()

Step,Training Loss,Validation Loss,Nanoquoraretrieval Cosine Accuracy@1,Nanoquoraretrieval Cosine Accuracy@3,Nanoquoraretrieval Cosine Accuracy@5,Nanoquoraretrieval Cosine Accuracy@10,Nanoquoraretrieval Cosine Precision@1,Nanoquoraretrieval Cosine Precision@3,Nanoquoraretrieval Cosine Precision@5,Nanoquoraretrieval Cosine Precision@10,Nanoquoraretrieval Cosine Recall@1,Nanoquoraretrieval Cosine Recall@3,Nanoquoraretrieval Cosine Recall@5,Nanoquoraretrieval Cosine Recall@10,Nanoquoraretrieval Cosine Ndcg@10,Nanoquoraretrieval Cosine Mrr@10,Nanoquoraretrieval Cosine Map@100,Nanobeir Mean Cosine Accuracy@1,Nanobeir Mean Cosine Accuracy@3,Nanobeir Mean Cosine Accuracy@5,Nanobeir Mean Cosine Accuracy@10,Nanobeir Mean Cosine Precision@1,Nanobeir Mean Cosine Precision@3,Nanobeir Mean Cosine Precision@5,Nanobeir Mean Cosine Precision@10,Nanobeir Mean Cosine Recall@1,Nanobeir Mean Cosine Recall@3,Nanobeir Mean Cosine Recall@5,Nanobeir Mean Cosine Recall@10,Nanobeir Mean Cosine Ndcg@10,Nanobeir Mean Cosine Mrr@10,Nanobeir Mean Cosine Map@100
100,0.0063,0.002936,0.88,0.96,0.98,1.0,0.88,0.4,0.26,0.136,0.767333,0.922,0.966,0.993333,0.931183,0.922889,0.905675,0.88,0.96,0.98,1.0,0.88,0.4,0.26,0.136,0.767333,0.922,0.966,0.993333,0.931183,0.922889,0.905675


Evaluating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Corpus Chunks:   0%|          | 0/1 [00:00<?, ?it/s][A

Batches:   0%|          | 0/101 [00:00<?, ?it/s]


Corpus Chunks: 100%|██████████| 1/1 [00:29<00:00, 29.81s/it]
Evaluating datasets: 100%|██████████| 1/1 [00:29<00:00, 29.90s/it]
[34m[1mwandb[0m: Adding directory to artifact (outputs/checkpoint-100)... Done. 82.3s


TrainOutput(global_step=100, training_loss=0.00627128541469574, metrics={'train_runtime': 1556.989, 'train_samples_per_second': 0.321, 'train_steps_per_second': 0.064, 'total_flos': 0.0, 'train_loss': 0.00627128541469574, 'epoch': 0.00779423226812159})

In [39]:
# @title **Evaluate the trained model on the test set**
test_evaluator = NanoBEIREvaluator(
    dataset_names = datasets,
    query_prompts=query_prompt if use_prompts else None,
    corpus_prompts=corpus_prompt if use_prompts else None,
    batch_size=50,
    show_progress_bar=True
)



In [40]:
test_evaluator(model)

Evaluating datasets:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Corpus Chunks:   0%|          | 0/1 [00:00<?, ?it/s][A

Batches:   0%|          | 0/101 [00:00<?, ?it/s]


Corpus Chunks: 100%|██████████| 1/1 [00:28<00:00, 28.89s/it]
Evaluating datasets: 100%|██████████| 1/1 [00:29<00:00, 29.57s/it]


{'NanoQuoraRetrieval_cosine_accuracy@1': 0.88,
 'NanoQuoraRetrieval_cosine_accuracy@3': 0.96,
 'NanoQuoraRetrieval_cosine_accuracy@5': 0.98,
 'NanoQuoraRetrieval_cosine_accuracy@10': 1.0,
 'NanoQuoraRetrieval_cosine_precision@1': 0.88,
 'NanoQuoraRetrieval_cosine_precision@3': 0.3999999999999999,
 'NanoQuoraRetrieval_cosine_precision@5': 0.25999999999999995,
 'NanoQuoraRetrieval_cosine_precision@10': 0.13599999999999998,
 'NanoQuoraRetrieval_cosine_recall@1': 0.7673333333333332,
 'NanoQuoraRetrieval_cosine_recall@3': 0.922,
 'NanoQuoraRetrieval_cosine_recall@5': 0.966,
 'NanoQuoraRetrieval_cosine_recall@10': 0.9933333333333334,
 'NanoQuoraRetrieval_cosine_ndcg@10': 0.9311833586321692,
 'NanoQuoraRetrieval_cosine_mrr@10': 0.9228888888888889,
 'NanoQuoraRetrieval_cosine_map@100': 0.9056754689754689,
 'NanoBEIR_mean_cosine_accuracy@1': 0.88,
 'NanoBEIR_mean_cosine_accuracy@3': 0.96,
 'NanoBEIR_mean_cosine_accuracy@5': 0.98,
 'NanoBEIR_mean_cosine_accuracy@10': 1.0,
 'NanoBEIR_mean_cosine_

### **Model Saving**

In [41]:
# @title **Save the trained model**
model.save_pretrained("embedding_fine_tuning_with_prompts_bge_large_en_v1.5")

In [42]:
# @title **Push it to the Hugging Face Hub**
# do not create repo on hugging face
model.push_to_hub("embedding_fine_tuning_with_prompts_bge_large_en_v1.5", exist_ok=True)

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...n3297zw/model.safetensors:   0%|          | 28.6kB / 1.34GB            

No files have been modified since last commit. Skipping to prevent empty commit.


'https://huggingface.co/DannyAI/embedding_fine_tuning_with_prompts_bge_large_en_v1.5/commit/062585873a97c8689f6fc1508446ebb04da294b3'