In [1]:
cd .. 

/home/admin-hieunn/important/VNUIS-Chatbot


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import numpy as np
import json
import torch
from sentence_transformers.losses import MatryoshkaLoss, MultipleNegativesRankingLoss
import pandas as pd
from datasets import Dataset
from huggingface_hub import login
import os
from dotenv import load_dotenv
from datetime import datetime
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import (
    InformationRetrievalEvaluator,
    SequentialEvaluator,
)
from sentence_transformers.util import cos_sim
from datasets import load_dataset, concatenate_datasets
from sentence_transformers import SentenceTransformerTrainingArguments
from sentence_transformers.training_args import BatchSamplers
from sentence_transformers import SentenceTransformerTrainer
from dotenv import load_dotenv

load_dotenv()
corpus = pd.read_csv("./Data/finetune_embedding/corpus.csv")
queries = pd.read_csv("./Data/finetune_embedding/queries.csv")
relevant_docs_data = pd.read_csv("./Data/finetune_embedding/eval.csv")


corpus = Dataset.from_pandas(corpus)
queries = Dataset.from_pandas(queries)
relevant_docs_data = Dataset.from_pandas(relevant_docs_data)
# Convert the datasets to dictionaries
corpus = dict(
    zip(corpus["relevantdocs_id"], corpus["Relevant docs"])
)  # Our corpus (cid => document)
queries = dict(
    zip(queries["queries_id"], queries["Question"])
)  # Our queries (qid => question)
# Convert integer keys to string keys
queries = {str(key): value for key, value in queries.items()}
# Now your 'queries' dictionary will have string keys

relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
for qid, corpus_ids in zip(
    relevant_docs_data["queries_id"], relevant_docs_data["relevantdocs_id"]
):
    qid = str(qid)
    corpus_ids = str(corpus_ids)
    if qid not in relevant_docs:
        relevant_docs[qid] = set()
    relevant_docs[qid].add(corpus_ids)

model = SentenceTransformer(
    # model_name_or_path="BAAI/bge-m3",
    model_name_or_path="/home/admin-hieunn/important/VNUIS-Chatbot/Model/vnuis_embedding_bge_20241227",
    trust_remote_code=True,
)
matryoshka_dimensions = [768, 512, 256, 128, 64]  # Important: large to small
matryoshka_evaluators = []
# Iterate over the different dimensions
for dim in matryoshka_dimensions:
    ir_evaluator = InformationRetrievalEvaluator(
        queries=queries,
        corpus=corpus,
        relevant_docs=relevant_docs,
        name=f"dim_{dim}",
        truncate_dim=dim,  # Truncate the embeddings to a certain dimension
        score_functions={"cosine": cos_sim},
    )
    matryoshka_evaluators.append(ir_evaluator)

# Create a sequential evaluator
evaluator = SequentialEvaluator(matryoshka_evaluators)

# Evaluate the model and ensure it's all on the correct device
results = evaluator(model)

for k, v in results.items():
    print(k, v)

  from tqdm.autonotebook import tqdm, trange


dim_768_cosine_accuracy@1 0.6781059947871416
dim_768_cosine_accuracy@3 0.8783666377063423
dim_768_cosine_accuracy@5 0.9135534317984362
dim_768_cosine_accuracy@10 0.94874022589053
dim_768_cosine_precision@1 0.6781059947871416
dim_768_cosine_precision@3 0.29278887923544744
dim_768_cosine_precision@5 0.18271068635968718
dim_768_cosine_precision@10 0.09487402258905299
dim_768_cosine_recall@1 0.6781059947871416
dim_768_cosine_recall@3 0.8783666377063423
dim_768_cosine_recall@5 0.9135534317984362
dim_768_cosine_recall@10 0.94874022589053
dim_768_cosine_ndcg@10 0.8248530495073324
dim_768_cosine_mrr@10 0.7839509812033691
dim_768_cosine_map@100 0.7867942877699591
dim_512_cosine_accuracy@1 0.6772371850564727
dim_512_cosine_accuracy@3 0.8718505647263249
dim_512_cosine_accuracy@5 0.9122502172024327
dim_512_cosine_accuracy@10 0.9539530842745438
dim_512_cosine_precision@1 0.6772371850564727
dim_512_cosine_precision@3 0.290616854908775
dim_512_cosine_precision@5 0.18245004344048651
dim_512_cosine_pre

In [3]:
def prepare_training_dataset(queries, corpus, relevant_docs):
    anchors = []
    positives = []
    for query_id, docs in relevant_docs.items():
        for doc_id in docs:
            anchors.append(queries[query_id])
            positives.append(corpus[doc_id])
    df = {"anchor": anchors, "positive": positives}

    return Dataset.from_dict(df)


pairs = prepare_training_dataset(queries, corpus, relevant_docs)

matryoshka_dimensions = [768, 512, 256, 128, 64]  # Important: large to small
inner_train_loss = MultipleNegativesRankingLoss(model)
train_loss = MatryoshkaLoss(
    model, inner_train_loss, matryoshka_dims=matryoshka_dimensions
)

# Get today's date in the desired format (e.g., YYYYMMDD)
today_date = datetime.today().strftime("%Y%m%d")

# Define the model name with today's date
model_name = f"vnuis_embedding_bge_{today_date}"

args = SentenceTransformerTrainingArguments(
    output_dir=f"./Model/{model_name}",  # output directory and hugging face model ID
    num_train_epochs=30,  # number of epochs
    per_device_train_batch_size=4,  # train batch size
    gradient_accumulation_steps=128,  # for a global batch size of 512
    per_device_eval_batch_size=4,  # evaluation batch size
    gradient_checkpointing=True,
    warmup_ratio=0.1,  # warmup ratio
    learning_rate=2e-5,  # learning rate, 2e-5 is a good value
    lr_scheduler_type="cosine",  # use constant learning rate scheduler
    optim="adamw_torch_fused",  # use fused adamw optimizer
    # tf32=False,                                  # use tf32 precision
    bf16=True,  # use bf16 precision
    batch_sampler=BatchSamplers.NO_DUPLICATES,  # MultipleNegativesRankingLoss benefits from no duplicate samples in a batch
    eval_strategy="steps",  # evaluate after each epoch
    save_strategy="steps",  # save after each epoch
    logging_steps=10,  # log every 10 steps
    save_steps = 10,
    save_total_limit=2,  # save only the last 3 models
    load_best_model_at_end=True,  # load the best model when training ends
    metric_for_best_model="eval_dim_128_cosine_ndcg@10",  # Optimizing for the best ndcg@10 score for the 128 dimension
)

trainer = SentenceTransformerTrainer(
    model=model,
    args=args,  # training arguments
    train_dataset=pairs,
    loss=train_loss,
    evaluator=evaluator,
)
# start training, the model will be automatically saved to the hub and the output directory
trainer.train()

# save the best model
trainer.save_model()


[2024-12-27 16:31:24,205] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
  8%|▊         | 10/120 [05:42<1:01:10, 33.37s/it]

{'loss': 2.6823, 'grad_norm': 17.194053649902344, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.22}


  8%|▊         | 10/120 [06:24<1:01:10, 33.37s/it]

{'eval_dim_768_cosine_accuracy@1': 0.7072111207645526, 'eval_dim_768_cosine_accuracy@3': 0.8801042571676803, 'eval_dim_768_cosine_accuracy@5': 0.9139878366637706, 'eval_dim_768_cosine_accuracy@10': 0.946133796698523, 'eval_dim_768_cosine_precision@1': 0.7072111207645526, 'eval_dim_768_cosine_precision@3': 0.29336808572256007, 'eval_dim_768_cosine_precision@5': 0.1827975673327541, 'eval_dim_768_cosine_precision@10': 0.09461337966985228, 'eval_dim_768_cosine_recall@1': 0.7072111207645526, 'eval_dim_768_cosine_recall@3': 0.8801042571676803, 'eval_dim_768_cosine_recall@5': 0.9139878366637706, 'eval_dim_768_cosine_recall@10': 0.946133796698523, 'eval_dim_768_cosine_ndcg@10': 0.8356623746856879, 'eval_dim_768_cosine_mrr@10': 0.7993144332740328, 'eval_dim_768_cosine_map@100': 0.8013748706823961, 'eval_dim_512_cosine_accuracy@1': 0.6980886185925282, 'eval_dim_512_cosine_accuracy@3': 0.8748913987836664, 'eval_dim_512_cosine_accuracy@5': 0.9152910512597741, 'eval_dim_512_cosine_accuracy@10': 0.9

 17%|█▋        | 20/120 [12:11<58:11, 34.91s/it]  

{'loss': 1.2366, 'grad_norm': 15.337178230285645, 'learning_rate': 1.973044870579824e-05, 'epoch': 4.44}


 17%|█▋        | 20/120 [12:54<58:11, 34.91s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6776715899218071, 'eval_dim_768_cosine_accuracy@3': 0.8801042571676803, 'eval_dim_768_cosine_accuracy@5': 0.9135534317984362, 'eval_dim_768_cosine_accuracy@10': 0.947871416159861, 'eval_dim_768_cosine_precision@1': 0.6776715899218071, 'eval_dim_768_cosine_precision@3': 0.29336808572256007, 'eval_dim_768_cosine_precision@5': 0.18271068635968718, 'eval_dim_768_cosine_precision@10': 0.09478714161598609, 'eval_dim_768_cosine_recall@1': 0.6776715899218071, 'eval_dim_768_cosine_recall@3': 0.8801042571676803, 'eval_dim_768_cosine_recall@5': 0.9135534317984362, 'eval_dim_768_cosine_recall@10': 0.947871416159861, 'eval_dim_768_cosine_ndcg@10': 0.824799747363737, 'eval_dim_768_cosine_mrr@10': 0.7840940589963166, 'eval_dim_768_cosine_map@100': 0.787013899142166, 'eval_dim_512_cosine_accuracy@1': 0.6776715899218071, 'eval_dim_512_cosine_accuracy@3': 0.8714161598609904, 'eval_dim_512_cosine_accuracy@5': 0.9122502172024327, 'eval_dim_512_cosine_accuracy@10': 0.95

 25%|██▌       | 30/120 [18:41<53:13, 35.48s/it]  

{'loss': 0.5207, 'grad_norm': 6.894130706787109, 'learning_rate': 1.866025403784439e-05, 'epoch': 6.67}


 25%|██▌       | 30/120 [19:23<53:13, 35.48s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6733275412684622, 'eval_dim_768_cosine_accuracy@3': 0.8609904430929627, 'eval_dim_768_cosine_accuracy@5': 0.9083405734144222, 'eval_dim_768_cosine_accuracy@10': 0.9496090356211989, 'eval_dim_768_cosine_precision@1': 0.6733275412684622, 'eval_dim_768_cosine_precision@3': 0.28699681436432084, 'eval_dim_768_cosine_precision@5': 0.1816681146828844, 'eval_dim_768_cosine_precision@10': 0.09496090356211988, 'eval_dim_768_cosine_recall@1': 0.6733275412684622, 'eval_dim_768_cosine_recall@3': 0.8609904430929627, 'eval_dim_768_cosine_recall@5': 0.9083405734144222, 'eval_dim_768_cosine_recall@10': 0.9496090356211989, 'eval_dim_768_cosine_ndcg@10': 0.818589713052241, 'eval_dim_768_cosine_mrr@10': 0.7758020975549199, 'eval_dim_768_cosine_map@100': 0.779000657177445, 'eval_dim_512_cosine_accuracy@1': 0.6655082536924414, 'eval_dim_512_cosine_accuracy@3': 0.8605560382276282, 'eval_dim_512_cosine_accuracy@5': 0.9070373588184187, 'eval_dim_512_cosine_accuracy@10': 0.9

 33%|███▎      | 40/120 [25:02<44:50, 33.63s/it]  

{'loss': 0.2292, 'grad_norm': 3.1116786003112793, 'learning_rate': 1.686241637868734e-05, 'epoch': 8.89}


 33%|███▎      | 40/120 [25:44<44:50, 33.63s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6542137271937446, 'eval_dim_768_cosine_accuracy@3': 0.8492615117289314, 'eval_dim_768_cosine_accuracy@5': 0.8979148566463945, 'eval_dim_768_cosine_accuracy@10': 0.9448305821025196, 'eval_dim_768_cosine_precision@1': 0.6542137271937446, 'eval_dim_768_cosine_precision@3': 0.2830871705763104, 'eval_dim_768_cosine_precision@5': 0.1795829713292789, 'eval_dim_768_cosine_precision@10': 0.09448305821025194, 'eval_dim_768_cosine_recall@1': 0.6542137271937446, 'eval_dim_768_cosine_recall@3': 0.8492615117289314, 'eval_dim_768_cosine_recall@5': 0.8979148566463945, 'eval_dim_768_cosine_recall@10': 0.9448305821025196, 'eval_dim_768_cosine_ndcg@10': 0.8044180506351685, 'eval_dim_768_cosine_mrr@10': 0.7588001461806834, 'eval_dim_768_cosine_map@100': 0.7626087324264056, 'eval_dim_512_cosine_accuracy@1': 0.6494352736750652, 'eval_dim_512_cosine_accuracy@3': 0.8423110338835795, 'eval_dim_512_cosine_accuracy@5': 0.8992180712423979, 'eval_dim_512_cosine_accuracy@10': 0.

 42%|████▏     | 50/120 [31:35<41:24, 35.49s/it]  

{'loss': 0.1523, 'grad_norm': 4.3719401359558105, 'learning_rate': 1.4487991802004625e-05, 'epoch': 11.11}


 42%|████▏     | 50/120 [32:17<41:24, 35.49s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6476976542137272, 'eval_dim_768_cosine_accuracy@3': 0.8331885317115552, 'eval_dim_768_cosine_accuracy@5': 0.8879235447437012, 'eval_dim_768_cosine_accuracy@10': 0.9404865334491747, 'eval_dim_768_cosine_precision@1': 0.6476976542137272, 'eval_dim_768_cosine_precision@3': 0.27772951057051837, 'eval_dim_768_cosine_precision@5': 0.1775847089487402, 'eval_dim_768_cosine_precision@10': 0.09404865334491745, 'eval_dim_768_cosine_recall@1': 0.6476976542137272, 'eval_dim_768_cosine_recall@3': 0.8331885317115552, 'eval_dim_768_cosine_recall@5': 0.8879235447437012, 'eval_dim_768_cosine_recall@10': 0.9404865334491747, 'eval_dim_768_cosine_ndcg@10': 0.7972598799160241, 'eval_dim_768_cosine_mrr@10': 0.7509975797443208, 'eval_dim_768_cosine_map@100': 0.7550072330162996, 'eval_dim_512_cosine_accuracy@1': 0.6498696785403997, 'eval_dim_512_cosine_accuracy@3': 0.8305821025195482, 'eval_dim_512_cosine_accuracy@5': 0.8827106863596872, 'eval_dim_512_cosine_accuracy@10': 0

 50%|█████     | 60/120 [38:03<35:28, 35.48s/it]

{'loss': 0.1033, 'grad_norm': 3.720799684524536, 'learning_rate': 1.1736481776669307e-05, 'epoch': 13.33}


 50%|█████     | 60/120 [38:45<35:28, 35.48s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6476976542137272, 'eval_dim_768_cosine_accuracy@3': 0.8305821025195482, 'eval_dim_768_cosine_accuracy@5': 0.8835794960903562, 'eval_dim_768_cosine_accuracy@10': 0.9396177237185056, 'eval_dim_768_cosine_precision@1': 0.6476976542137272, 'eval_dim_768_cosine_precision@3': 0.27686070083984937, 'eval_dim_768_cosine_precision@5': 0.1767158992180712, 'eval_dim_768_cosine_precision@10': 0.09396177237185055, 'eval_dim_768_cosine_recall@1': 0.6476976542137272, 'eval_dim_768_cosine_recall@3': 0.8305821025195482, 'eval_dim_768_cosine_recall@5': 0.8835794960903562, 'eval_dim_768_cosine_recall@10': 0.9396177237185056, 'eval_dim_768_cosine_ndcg@10': 0.7966450368104928, 'eval_dim_768_cosine_mrr@10': 0.7505449022933807, 'eval_dim_768_cosine_map@100': 0.7546402988933104, 'eval_dim_512_cosine_accuracy@1': 0.6490008688097306, 'eval_dim_512_cosine_accuracy@3': 0.8262380538662033, 'eval_dim_512_cosine_accuracy@5': 0.8801042571676803, 'eval_dim_512_cosine_accuracy@10': 0

 58%|█████▊    | 70/120 [44:30<29:17, 35.15s/it]

{'loss': 0.072, 'grad_norm': 3.760728597640991, 'learning_rate': 8.839070858747697e-06, 'epoch': 15.56}


 58%|█████▊    | 70/120 [45:12<29:17, 35.15s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6429192006950478, 'eval_dim_768_cosine_accuracy@3': 0.8236316246741964, 'eval_dim_768_cosine_accuracy@5': 0.8827106863596872, 'eval_dim_768_cosine_accuracy@10': 0.9422241529105126, 'eval_dim_768_cosine_precision@1': 0.6429192006950478, 'eval_dim_768_cosine_precision@3': 0.2745438748913988, 'eval_dim_768_cosine_precision@5': 0.1765421372719374, 'eval_dim_768_cosine_precision@10': 0.09422241529105126, 'eval_dim_768_cosine_recall@1': 0.6429192006950478, 'eval_dim_768_cosine_recall@3': 0.8236316246741964, 'eval_dim_768_cosine_recall@5': 0.8827106863596872, 'eval_dim_768_cosine_recall@10': 0.9422241529105126, 'eval_dim_768_cosine_ndcg@10': 0.7940848894749082, 'eval_dim_768_cosine_mrr@10': 0.7465006274736936, 'eval_dim_768_cosine_map@100': 0.7504106850354332, 'eval_dim_512_cosine_accuracy@1': 0.6437880104257168, 'eval_dim_512_cosine_accuracy@3': 0.8275412684622068, 'eval_dim_512_cosine_accuracy@5': 0.8770634231103388, 'eval_dim_512_cosine_accuracy@10': 0.

 67%|██████▋   | 80/120 [50:54<22:52, 34.32s/it]

{'loss': 0.0587, 'grad_norm': 1.4197888374328613, 'learning_rate': 6.039202339608432e-06, 'epoch': 17.78}


 67%|██████▋   | 80/120 [51:36<22:52, 34.32s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6485664639443962, 'eval_dim_768_cosine_accuracy@3': 0.8301476976542137, 'eval_dim_768_cosine_accuracy@5': 0.8861859252823632, 'eval_dim_768_cosine_accuracy@10': 0.9435273675065161, 'eval_dim_768_cosine_precision@1': 0.6485664639443962, 'eval_dim_768_cosine_precision@3': 0.2767158992180712, 'eval_dim_768_cosine_precision@5': 0.17723718505647262, 'eval_dim_768_cosine_precision@10': 0.09435273675065159, 'eval_dim_768_cosine_recall@1': 0.6485664639443962, 'eval_dim_768_cosine_recall@3': 0.8301476976542137, 'eval_dim_768_cosine_recall@5': 0.8861859252823632, 'eval_dim_768_cosine_recall@10': 0.9435273675065161, 'eval_dim_768_cosine_ndcg@10': 0.7985587885695002, 'eval_dim_768_cosine_mrr@10': 0.7518588046281349, 'eval_dim_768_cosine_map@100': 0.7557425865771096, 'eval_dim_512_cosine_accuracy@1': 0.6503040834057341, 'eval_dim_512_cosine_accuracy@3': 0.8297132927888793, 'eval_dim_512_cosine_accuracy@5': 0.8814074717636837, 'eval_dim_512_cosine_accuracy@10': 0

 75%|███████▌  | 90/120 [57:14<16:15, 32.51s/it]

{'loss': 0.0544, 'grad_norm': 2.2262110710144043, 'learning_rate': 3.5721239031346067e-06, 'epoch': 20.0}


 75%|███████▌  | 90/120 [57:56<16:15, 32.51s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6424847958297133, 'eval_dim_768_cosine_accuracy@3': 0.8288444830582102, 'eval_dim_768_cosine_accuracy@5': 0.8840139009556907, 'eval_dim_768_cosine_accuracy@10': 0.9430929626411816, 'eval_dim_768_cosine_precision@1': 0.6424847958297133, 'eval_dim_768_cosine_precision@3': 0.27628149435273675, 'eval_dim_768_cosine_precision@5': 0.17680278019113815, 'eval_dim_768_cosine_precision@10': 0.09430929626411814, 'eval_dim_768_cosine_recall@1': 0.6424847958297133, 'eval_dim_768_cosine_recall@3': 0.8288444830582102, 'eval_dim_768_cosine_recall@5': 0.8840139009556907, 'eval_dim_768_cosine_recall@10': 0.9430929626411816, 'eval_dim_768_cosine_ndcg@10': 0.7954273121476143, 'eval_dim_768_cosine_mrr@10': 0.7478512473625409, 'eval_dim_768_cosine_map@100': 0.7517847335646215, 'eval_dim_512_cosine_accuracy@1': 0.6437880104257168, 'eval_dim_512_cosine_accuracy@3': 0.8297132927888793, 'eval_dim_512_cosine_accuracy@5': 0.8801042571676803, 'eval_dim_512_cosine_accuracy@10': 

 83%|████████▎ | 100/120 [1:03:43<11:16, 33.84s/it]

{'loss': 0.052, 'grad_norm': 1.119154691696167, 'learning_rate': 1.6451218858706374e-06, 'epoch': 22.22}


 83%|████████▎ | 100/120 [1:04:25<11:16, 33.84s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6407471763683753, 'eval_dim_768_cosine_accuracy@3': 0.8258036490008688, 'eval_dim_768_cosine_accuracy@5': 0.8840139009556907, 'eval_dim_768_cosine_accuracy@10': 0.944396177237185, 'eval_dim_768_cosine_precision@1': 0.6407471763683753, 'eval_dim_768_cosine_precision@3': 0.27526788300028954, 'eval_dim_768_cosine_precision@5': 0.17680278019113813, 'eval_dim_768_cosine_precision@10': 0.09443961772371849, 'eval_dim_768_cosine_recall@1': 0.6407471763683753, 'eval_dim_768_cosine_recall@3': 0.8258036490008688, 'eval_dim_768_cosine_recall@5': 0.8840139009556907, 'eval_dim_768_cosine_recall@10': 0.944396177237185, 'eval_dim_768_cosine_ndcg@10': 0.7948630454288174, 'eval_dim_768_cosine_mrr@10': 0.746743170190172, 'eval_dim_768_cosine_map@100': 0.750561042319851, 'eval_dim_512_cosine_accuracy@1': 0.6424847958297133, 'eval_dim_512_cosine_accuracy@3': 0.8301476976542137, 'eval_dim_512_cosine_accuracy@5': 0.8788010425716768, 'eval_dim_512_cosine_accuracy@10': 0.94

 92%|█████████▏| 110/120 [1:10:12<05:48, 34.88s/it]

{'loss': 0.0507, 'grad_norm': 3.916719913482666, 'learning_rate': 4.2010487684511105e-07, 'epoch': 24.44}


 92%|█████████▏| 110/120 [1:10:54<05:48, 34.88s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6424847958297133, 'eval_dim_768_cosine_accuracy@3': 0.8275412684622068, 'eval_dim_768_cosine_accuracy@5': 0.8840139009556907, 'eval_dim_768_cosine_accuracy@10': 0.9448305821025196, 'eval_dim_768_cosine_precision@1': 0.6424847958297133, 'eval_dim_768_cosine_precision@3': 0.2758470894874023, 'eval_dim_768_cosine_precision@5': 0.17680278019113815, 'eval_dim_768_cosine_precision@10': 0.09448305821025194, 'eval_dim_768_cosine_recall@1': 0.6424847958297133, 'eval_dim_768_cosine_recall@3': 0.8275412684622068, 'eval_dim_768_cosine_recall@5': 0.8840139009556907, 'eval_dim_768_cosine_recall@10': 0.9448305821025196, 'eval_dim_768_cosine_ndcg@10': 0.7958479336753231, 'eval_dim_768_cosine_mrr@10': 0.7479177871554059, 'eval_dim_768_cosine_map@100': 0.7516998684461761, 'eval_dim_512_cosine_accuracy@1': 0.6459600347523893, 'eval_dim_512_cosine_accuracy@3': 0.8305821025195482, 'eval_dim_512_cosine_accuracy@5': 0.8783666377063423, 'eval_dim_512_cosine_accuracy@10': 0

100%|██████████| 120/120 [1:16:41<00:00, 35.46s/it]

{'loss': 0.0463, 'grad_norm': 1.7233991622924805, 'learning_rate': 0.0, 'epoch': 26.67}


100%|██████████| 120/120 [1:17:23<00:00, 35.46s/it]

{'eval_dim_768_cosine_accuracy@1': 0.6416159860990444, 'eval_dim_768_cosine_accuracy@3': 0.8284100781928757, 'eval_dim_768_cosine_accuracy@5': 0.8844483058210252, 'eval_dim_768_cosine_accuracy@10': 0.944396177237185, 'eval_dim_768_cosine_precision@1': 0.6416159860990444, 'eval_dim_768_cosine_precision@3': 0.2761366927309586, 'eval_dim_768_cosine_precision@5': 0.17688966116420504, 'eval_dim_768_cosine_precision@10': 0.09443961772371849, 'eval_dim_768_cosine_recall@1': 0.6416159860990444, 'eval_dim_768_cosine_recall@3': 0.8284100781928757, 'eval_dim_768_cosine_recall@5': 0.8844483058210252, 'eval_dim_768_cosine_recall@10': 0.944396177237185, 'eval_dim_768_cosine_ndcg@10': 0.7955669732681404, 'eval_dim_768_cosine_mrr@10': 0.7476478355605195, 'eval_dim_768_cosine_map@100': 0.7514706688798185, 'eval_dim_512_cosine_accuracy@1': 0.6463944396177237, 'eval_dim_512_cosine_accuracy@3': 0.8301476976542137, 'eval_dim_512_cosine_accuracy@5': 0.8805386620330148, 'eval_dim_512_cosine_accuracy@10': 0.9

100%|██████████| 120/120 [1:17:29<00:00, 38.74s/it]


{'train_runtime': 4649.2264, 'train_samples_per_second': 14.854, 'train_steps_per_second': 0.026, 'train_loss': 0.43819571236769356, 'epoch': 26.67}


In [4]:

fine_tuned_model = SentenceTransformer(
    args.output_dir, device="cuda" if torch.cuda.is_available() else "cpu"
)
# Evaluate the model
results = evaluator(fine_tuned_model)

for k, v in results.items():
    print(k, v)


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.47 GiB. GPU 0 has a total capacity of 15.70 GiB of which 1.01 GiB is free. Including non-PyTorch memory, this process has 14.67 GiB memory in use. Of the allocated memory 12.12 GiB is allocated by PyTorch, and 2.39 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)