# Install & Load Packages

In [1]:
%pip install -U "sentence-transformers[train]" " transformers[torch]" accelerate datasets pandas matplotlib seaborn numpy

Collecting sentence-transformers[train]
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
                                              0.0/227.1 kB ? eta -:--:--
     ------------------                     112.6/227.1 kB 3.3 MB/s eta 0:00:01
     --------------------------------       194.6/227.1 kB 2.0 MB/s eta 0:00:01
     -------------------------------------  225.3/227.1 kB 2.0 MB/s eta 0:00:01
     -------------------------------------- 227.1/227.1 kB 1.4 MB/s eta 0:00:00
Collecting accelerate
  Downloading accelerate-0.33.0-py3-none-any.whl (315 kB)
                                              0.0/315.1 kB ? eta -:--:--
     -------                                 61.4/315.1 kB 1.7 MB/s eta 0:00:01
     -------------                          112.6/315.1 kB 1.3 MB/s eta 0:00:01
     ----------------------------           235.5/315.1 kB 1.3 MB/s eta 0:00:01
     -------------------------------------  307.2/315.1 kB 1.3 MB/s eta 0:00:01
     ---------------------

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.13.0 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible.
tensorflow-intel 2.13.0 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible.

[notice] A new release of pip is available: 23.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Import libraries
from datasets import load_dataset, Dataset
from sentence_transformers import (
    SentenceTransformer,
    SentenceTransformerTrainer,
    SentenceTransformerTrainingArguments
)
from sentence_transformers.losses import CoSENTLoss
from sentence_transformers.training_args import BatchSamplers
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, SimilarityFunction
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Fine-tuning Model

In [4]:
# Define model
model = SentenceTransformer("firqaaa/indo-sentence-bert-base")

# Define loss function (CoSENTLoss | Cosine Sentence Loss -> Returning float similarity score)
loss = CoSENTLoss(model)

In [6]:
# Specify training args
args = SentenceTransformerTrainingArguments(
    output_dir="fine-tuned/sbert-fine-tuned-chatPMB",
    num_train_epochs=100,
    per_device_train_batch_size=24,
    per_device_eval_batch_size=24,
    learning_rate=2e-5,
    warmup_ratio=0.1,
    batch_sampler=BatchSamplers.NO_DUPLICATES,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    save_total_limit=2,
    logging_steps=100,
)

In [11]:
# Creating eval dataset
train_patterns = pd.read_csv('data/preprocessed-data-v2.csv')['pattern']
test_patterns = pd.read_csv('data/dataset-question-v2.csv')['pattern']
# test_patterns = test_patterns.iloc[:-154]
print(train_patterns.shape, test_patterns.shape)

(558,) (558,)


In [12]:
embed_train = model.encode(train_patterns)
embed_test = model.encode(test_patterns)

In [13]:
scores = []
sentences_1 = []
sentences_2 = []

for i in range(len(test_patterns)):
    data = model.similarity(embed_train[int(i)], embed_test[int(i)])
    data = float(data[0][0])
    s1 = train_patterns[int(i)]
    s2 = test_patterns[int(i)]
    sentences_1.append(s1)
    sentences_2.append(s2)
    scores.append(data)

df = pd.DataFrame({
    "sentence_1": sentences_1,
    "sentence_2": sentences_2,
    "label": scores,
})
df.to_csv('data/fine-tuned-dataset.csv')
df.tail()

Unnamed: 0,sentence_1,sentence_2,label
553,program beasiswa sedia uin sunan gunung djati ...,Apakah program beasiswa disediakan oleh UIN Su...,0.90367
554,program beasiswa uin sunan gunung djati bandun...,Program beasiswa di UIN Sunan Gunung Djati Ban...,0.887859
555,beasiswa uin sunan gunung djati bandung ada,Apakah beasiswa di UIN Sunan Gunung Djati Band...,0.888745
556,program beasiswa uin sunan gunung djati bandun...,Program beasiswa di UIN Sunan Gunung Djati Ban...,0.888634
557,beasiswa uin sunan gunung djati bandung beri,Apakah beasiswa di UIN Sunan Gunung Djati Band...,0.892302


In [14]:
# Create evaluator & evaluate the base model
dev_evaluator = EmbeddingSimilarityEvaluator(
    sentences1=df["sentence_1"],
    sentences2=df["sentence_2"],
    scores=df["label"],
    main_similarity=SimilarityFunction.COSINE,
    show_progress_bar=True,
    precision="float32",
    name="train-evaluator",
)
dev_evaluator(model)

Batches:   0%|          | 0/35 [00:00<?, ?it/s]

Batches: 100%|██████████| 35/35 [00:20<00:00,  1.71it/s]
Batches: 100%|██████████| 35/35 [00:23<00:00,  1.49it/s]


{'train-evaluator_pearson_cosine': 0.9999999999984652,
 'train-evaluator_spearman_cosine': 0.9999993956485574,
 'train-evaluator_pearson_manhattan': 0.9754313537197102,
 'train-evaluator_spearman_manhattan': 0.9992904394150622,
 'train-evaluator_pearson_euclidean': 0.9749967817818792,
 'train-evaluator_spearman_euclidean': 0.9999992747782398,
 'train-evaluator_pearson_dot': 0.9999999999990659,
 'train-evaluator_spearman_dot': 0.9999993956487196,
 'train-evaluator_pearson_max': 0.9999999999990659,
 'train-evaluator_spearman_max': 0.9999993956487196}

In [16]:
df.shape

(558, 3)

In [17]:
training_data = Dataset.from_dict(df)
training_data

Dataset({
    features: ['sentence_1', 'sentence_2', 'label'],
    num_rows: 558
})

In [18]:
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=training_data,
    # eval_dataset=eval_dataset,
    loss=loss,
    evaluator=dev_evaluator,
)
trainer.train()

  4%|▍         | 100/2400 [12:36<4:57:37,  7.76s/it]

{'loss': 4.3011, 'grad_norm': 21.986669540405273, 'learning_rate': 8.333333333333334e-06, 'epoch': 3.79}


Batches: 100%|██████████| 35/35 [00:22<00:00,  1.56it/s]
Batches: 100%|██████████| 35/35 [00:26<00:00,  1.34it/s]
  4%|▍         | 100/2400 [13:25<4:57:37,  7.76s/it]

{'eval_train-evaluator_pearson_cosine': 0.9516155808784074, 'eval_train-evaluator_spearman_cosine': 0.9523163665196182, 'eval_train-evaluator_pearson_manhattan': 0.9265545754418698, 'eval_train-evaluator_spearman_manhattan': 0.9522315139076578, 'eval_train-evaluator_pearson_euclidean': 0.926455767418, 'eval_train-evaluator_spearman_euclidean': 0.952316381316748, 'eval_train-evaluator_pearson_dot': 0.9516155833327419, 'eval_train-evaluator_spearman_dot': 0.9523164750861552, 'eval_train-evaluator_pearson_max': 0.9516155833327419, 'eval_train-evaluator_spearman_max': 0.9523164750861552, 'eval_runtime': 48.8363, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 3.79}


  8%|▊         | 200/2400 [25:30<5:10:27,  8.47s/it] 

{'loss': 4.312, 'grad_norm': 31.03908920288086, 'learning_rate': 1.6666666666666667e-05, 'epoch': 7.46}


Batches: 100%|██████████| 35/35 [00:22<00:00,  1.58it/s]
Batches: 100%|██████████| 35/35 [00:25<00:00,  1.38it/s]
  8%|▊         | 200/2400 [26:17<5:10:27,  8.47s/it]

{'eval_train-evaluator_pearson_cosine': 0.7800321819951106, 'eval_train-evaluator_spearman_cosine': 0.8460012971804518, 'eval_train-evaluator_pearson_manhattan': 0.8167849317040482, 'eval_train-evaluator_spearman_manhattan': 0.8468549179355855, 'eval_train-evaluator_pearson_euclidean': 0.8166058511600963, 'eval_train-evaluator_spearman_euclidean': 0.8460012533562367, 'eval_train-evaluator_pearson_dot': 0.7800321650303118, 'eval_train-evaluator_spearman_dot': 0.8460006703120116, 'eval_train-evaluator_pearson_max': 0.8167849317040482, 'eval_train-evaluator_spearman_max': 0.8468549179355855, 'eval_runtime': 47.572, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 7.46}


 12%|█▎        | 300/2400 [38:30<3:48:02,  6.52s/it] 

{'loss': 4.3889, 'grad_norm': 10.791862487792969, 'learning_rate': 1.9444444444444445e-05, 'epoch': 11.12}


Batches: 100%|██████████| 35/35 [00:21<00:00,  1.60it/s]
Batches: 100%|██████████| 35/35 [00:25<00:00,  1.39it/s]
 12%|█▎        | 300/2400 [39:17<3:48:02,  6.52s/it]

{'eval_train-evaluator_pearson_cosine': 0.866984259081386, 'eval_train-evaluator_spearman_cosine': 0.8895785711147086, 'eval_train-evaluator_pearson_manhattan': 0.866483514235168, 'eval_train-evaluator_spearman_manhattan': 0.8870074752593978, 'eval_train-evaluator_pearson_euclidean': 0.8690202087648823, 'eval_train-evaluator_spearman_euclidean': 0.8895786056490868, 'eval_train-evaluator_pearson_dot': 0.8669842685132595, 'eval_train-evaluator_spearman_dot': 0.8895787589325899, 'eval_train-evaluator_pearson_max': 0.8690202087648823, 'eval_train-evaluator_spearman_max': 0.8895787589325899, 'eval_runtime': 47.2421, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 11.12}


 17%|█▋        | 400/2400 [51:33<4:06:02,  7.38s/it] 

{'loss': 4.246, 'grad_norm': 20.749361038208008, 'learning_rate': 1.851851851851852e-05, 'epoch': 14.92}


Batches: 100%|██████████| 35/35 [00:21<00:00,  1.59it/s]
Batches: 100%|██████████| 35/35 [00:25<00:00,  1.38it/s]
 17%|█▋        | 400/2400 [52:20<4:06:02,  7.38s/it]

{'eval_train-evaluator_pearson_cosine': 0.916483865338636, 'eval_train-evaluator_spearman_cosine': 0.9537940547514959, 'eval_train-evaluator_pearson_manhattan': 0.9204664228094912, 'eval_train-evaluator_spearman_manhattan': 0.9551717378125972, 'eval_train-evaluator_pearson_euclidean': 0.9199856012495278, 'eval_train-evaluator_spearman_euclidean': 0.9537939543397168, 'eval_train-evaluator_pearson_dot': 0.9164838737475484, 'eval_train-evaluator_spearman_dot': 0.9537933422951685, 'eval_train-evaluator_pearson_max': 0.9204664228094912, 'eval_train-evaluator_spearman_max': 0.9551717378125972, 'eval_runtime': 47.39, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 14.92}


 21%|██        | 500/2400 [1:04:17<4:21:25,  8.26s/it]

{'loss': 3.9871, 'grad_norm': 25.44622802734375, 'learning_rate': 1.7592592592592595e-05, 'epoch': 18.58}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.77it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.41it/s]
 21%|██        | 500/2400 [1:05:01<4:21:25,  8.26s/it]

{'eval_train-evaluator_pearson_cosine': 0.9016866025282567, 'eval_train-evaluator_spearman_cosine': 0.9330153430887441, 'eval_train-evaluator_pearson_manhattan': 0.8979047637875432, 'eval_train-evaluator_spearman_manhattan': 0.9330369592964841, 'eval_train-evaluator_pearson_euclidean': 0.8986264489069945, 'eval_train-evaluator_spearman_euclidean': 0.9330152924438065, 'eval_train-evaluator_pearson_dot': 0.9016865971626655, 'eval_train-evaluator_spearman_dot': 0.9330153271427737, 'eval_train-evaluator_pearson_max': 0.9016866025282567, 'eval_train-evaluator_spearman_max': 0.9330369592964841, 'eval_runtime': 44.7698, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 18.58}


 25%|██▌       | 600/2400 [1:16:38<3:59:43,  7.99s/it] 

{'loss': 3.8918, 'grad_norm': 16.91845703125, 'learning_rate': 1.6666666666666667e-05, 'epoch': 22.25}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.81it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.42it/s]
 25%|██▌       | 600/2400 [1:17:22<3:59:43,  7.99s/it]

{'eval_train-evaluator_pearson_cosine': 0.9210258466378871, 'eval_train-evaluator_spearman_cosine': 0.9475686986339454, 'eval_train-evaluator_pearson_manhattan': 0.9100174113965731, 'eval_train-evaluator_spearman_manhattan': 0.9474965217839482, 'eval_train-evaluator_pearson_euclidean': 0.9116600598179189, 'eval_train-evaluator_spearman_euclidean': 0.9475686331865225, 'eval_train-evaluator_pearson_dot': 0.921025856675603, 'eval_train-evaluator_spearman_dot': 0.9475686403634463, 'eval_train-evaluator_pearson_max': 0.921025856675603, 'eval_train-evaluator_spearman_max': 0.9475686986339454, 'eval_runtime': 43.9968, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 22.25}


 29%|██▉       | 700/2400 [1:29:14<2:15:48,  4.79s/it] 

{'loss': 3.8895, 'grad_norm': 0.0, 'learning_rate': 1.5740740740740744e-05, 'epoch': 26.04}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.80it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 29%|██▉       | 700/2400 [1:29:58<2:15:48,  4.79s/it]

{'eval_train-evaluator_pearson_cosine': 0.9321178264169939, 'eval_train-evaluator_spearman_cosine': 0.9570015582602959, 'eval_train-evaluator_pearson_manhattan': 0.910473962797865, 'eval_train-evaluator_spearman_manhattan': 0.9564753759797757, 'eval_train-evaluator_pearson_euclidean': 0.9122300390763741, 'eval_train-evaluator_spearman_euclidean': 0.9570015762699372, 'eval_train-evaluator_pearson_dot': 0.9321178177439461, 'eval_train-evaluator_spearman_dot': 0.9570000121906285, 'eval_train-evaluator_pearson_max': 0.9321178264169939, 'eval_train-evaluator_spearman_max': 0.9570015762699372, 'eval_runtime': 43.7533, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 26.04}


 33%|███▎      | 800/2400 [1:41:50<3:29:29,  7.86s/it]

{'loss': 3.6486, 'grad_norm': 14.686385154724121, 'learning_rate': 1.4814814814814815e-05, 'epoch': 29.71}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.82it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.43it/s]
 33%|███▎      | 800/2400 [1:42:34<3:29:29,  7.86s/it]

{'eval_train-evaluator_pearson_cosine': 0.9477071776533973, 'eval_train-evaluator_spearman_cosine': 0.9594512708453865, 'eval_train-evaluator_pearson_manhattan': 0.9203111732411582, 'eval_train-evaluator_spearman_manhattan': 0.9587315233315367, 'eval_train-evaluator_pearson_euclidean': 0.9207950830890862, 'eval_train-evaluator_spearman_euclidean': 0.9594512377113171, 'eval_train-evaluator_pearson_dot': 0.9477071746672909, 'eval_train-evaluator_spearman_dot': 0.9594511061121107, 'eval_train-evaluator_pearson_max': 0.9477071776533973, 'eval_train-evaluator_spearman_max': 0.9594512708453865, 'eval_runtime': 43.8084, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 29.71}


 38%|███▊      | 900/2400 [1:54:00<3:10:26,  7.62s/it]

{'loss': 3.3835, 'grad_norm': 18.78765296936035, 'learning_rate': 1.388888888888889e-05, 'epoch': 33.38}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.77it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.43it/s]
 38%|███▊      | 900/2400 [1:54:45<3:10:26,  7.62s/it]

{'eval_train-evaluator_pearson_cosine': 0.9390452500691708, 'eval_train-evaluator_spearman_cosine': 0.9645363900724098, 'eval_train-evaluator_pearson_manhattan': 0.9120888635279306, 'eval_train-evaluator_spearman_manhattan': 0.9647881099245303, 'eval_train-evaluator_pearson_euclidean': 0.9125907948655124, 'eval_train-evaluator_spearman_euclidean': 0.9645364566917775, 'eval_train-evaluator_pearson_dot': 0.9390452482526817, 'eval_train-evaluator_spearman_dot': 0.9645354754029207, 'eval_train-evaluator_pearson_max': 0.9390452500691708, 'eval_train-evaluator_spearman_max': 0.9647881099245303, 'eval_runtime': 44.3665, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 33.38}


 42%|████▏     | 1000/2400 [2:05:57<1:43:37,  4.44s/it]

{'loss': 3.3708, 'grad_norm': 10.617086410522461, 'learning_rate': 1.2962962962962964e-05, 'epoch': 37.04}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.81it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.42it/s]
 42%|████▏     | 1000/2400 [2:06:41<1:43:37,  4.44s/it]

{'eval_train-evaluator_pearson_cosine': 0.950905312025487, 'eval_train-evaluator_spearman_cosine': 0.9649218806524354, 'eval_train-evaluator_pearson_manhattan': 0.9346029184655313, 'eval_train-evaluator_spearman_manhattan': 0.9648603379680938, 'eval_train-evaluator_pearson_euclidean': 0.9337410882349435, 'eval_train-evaluator_spearman_euclidean': 0.9649219497211867, 'eval_train-evaluator_pearson_dot': 0.9509053061521452, 'eval_train-evaluator_spearman_dot': 0.9649211626990194, 'eval_train-evaluator_pearson_max': 0.950905312025487, 'eval_train-evaluator_spearman_max': 0.9649219497211867, 'eval_runtime': 44.1233, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 37.04}


 46%|████▌     | 1100/2400 [2:18:17<2:41:30,  7.45s/it]

{'loss': 3.3185, 'grad_norm': 13.443455696105957, 'learning_rate': 1.2037037037037039e-05, 'epoch': 40.83}


Batches: 100%|██████████| 35/35 [00:18<00:00,  1.85it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.46it/s]
 46%|████▌     | 1100/2400 [2:19:00<2:41:30,  7.45s/it]

{'eval_train-evaluator_pearson_cosine': 0.9537389431318589, 'eval_train-evaluator_spearman_cosine': 0.9677923255949341, 'eval_train-evaluator_pearson_manhattan': 0.9153542443218923, 'eval_train-evaluator_spearman_manhattan': 0.9677684099828757, 'eval_train-evaluator_pearson_euclidean': 0.9156838549899057, 'eval_train-evaluator_spearman_euclidean': 0.9677924280858128, 'eval_train-evaluator_pearson_dot': 0.9537389427804409, 'eval_train-evaluator_spearman_dot': 0.9677915992340147, 'eval_train-evaluator_pearson_max': 0.9537389431318589, 'eval_train-evaluator_spearman_max': 0.9677924280858128, 'eval_runtime': 43.0158, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 40.83}


 50%|█████     | 1200/2400 [2:30:30<2:43:07,  8.16s/it]

{'loss': 3.1159, 'grad_norm': 22.2475643157959, 'learning_rate': 1.1111111111111113e-05, 'epoch': 44.5}


Batches: 100%|██████████| 35/35 [00:18<00:00,  1.85it/s]
Batches: 100%|██████████| 35/35 [00:23<00:00,  1.48it/s]
 50%|█████     | 1200/2400 [2:31:12<2:43:07,  8.16s/it]

{'eval_train-evaluator_pearson_cosine': 0.9493034325108537, 'eval_train-evaluator_spearman_cosine': 0.9728410955217511, 'eval_train-evaluator_pearson_manhattan': 0.9142410204050964, 'eval_train-evaluator_spearman_manhattan': 0.9725782702266825, 'eval_train-evaluator_pearson_euclidean': 0.9155843545065216, 'eval_train-evaluator_spearman_euclidean': 0.9728411459164745, 'eval_train-evaluator_pearson_dot': 0.949303432230875, 'eval_train-evaluator_spearman_dot': 0.9728404366393036, 'eval_train-evaluator_pearson_max': 0.9493034325108537, 'eval_train-evaluator_spearman_max': 0.9728411459164745, 'eval_runtime': 42.647, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 44.5}


 54%|█████▍    | 1300/2400 [2:42:16<1:59:37,  6.52s/it]

{'loss': 3.0161, 'grad_norm': 30.831308364868164, 'learning_rate': 1.0185185185185186e-05, 'epoch': 48.17}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.80it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 54%|█████▍    | 1300/2400 [2:43:00<1:59:37,  6.52s/it]

{'eval_train-evaluator_pearson_cosine': 0.94846153671805, 'eval_train-evaluator_spearman_cosine': 0.9715752714930435, 'eval_train-evaluator_pearson_manhattan': 0.9130949053933246, 'eval_train-evaluator_spearman_manhattan': 0.9716195978373134, 'eval_train-evaluator_pearson_euclidean': 0.9130631599516, 'eval_train-evaluator_spearman_euclidean': 0.9715752724746544, 'eval_train-evaluator_pearson_dot': 0.9484615194771584, 'eval_train-evaluator_spearman_dot': 0.9715751530767609, 'eval_train-evaluator_pearson_max': 0.94846153671805, 'eval_train-evaluator_spearman_max': 0.9716195978373134, 'eval_runtime': 43.8719, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 48.17}


 58%|█████▊    | 1400/2400 [2:54:49<2:02:46,  7.37s/it]

{'loss': 3.045, 'grad_norm': 25.3526554107666, 'learning_rate': 9.25925925925926e-06, 'epoch': 51.96}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.84it/s]
Batches: 100%|██████████| 35/35 [00:23<00:00,  1.47it/s]
 58%|█████▊    | 1400/2400 [2:55:32<2:02:46,  7.37s/it]

{'eval_train-evaluator_pearson_cosine': 0.9506788443355587, 'eval_train-evaluator_spearman_cosine': 0.9706983242834226, 'eval_train-evaluator_pearson_manhattan': 0.9135081655315156, 'eval_train-evaluator_spearman_manhattan': 0.970405005026128, 'eval_train-evaluator_pearson_euclidean': 0.9135336710028713, 'eval_train-evaluator_spearman_euclidean': 0.9706984436359012, 'eval_train-evaluator_pearson_dot': 0.9506788338664054, 'eval_train-evaluator_spearman_dot': 0.9706984603322562, 'eval_train-evaluator_pearson_max': 0.9506788443355587, 'eval_train-evaluator_spearman_max': 0.9706984603322562, 'eval_runtime': 42.8343, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 51.96}


 62%|██████▎   | 1500/2400 [3:07:05<2:00:34,  8.04s/it]

{'loss': 2.7254, 'grad_norm': 26.400279998779297, 'learning_rate': 8.333333333333334e-06, 'epoch': 55.62}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.80it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 62%|██████▎   | 1500/2400 [3:07:49<2:00:34,  8.04s/it]

{'eval_train-evaluator_pearson_cosine': 0.9474084259070313, 'eval_train-evaluator_spearman_cosine': 0.9707158509849902, 'eval_train-evaluator_pearson_manhattan': 0.9055862328092981, 'eval_train-evaluator_spearman_manhattan': 0.970604096730385, 'eval_train-evaluator_pearson_euclidean': 0.9062472920949667, 'eval_train-evaluator_spearman_euclidean': 0.9707157819162391, 'eval_train-evaluator_pearson_dot': 0.9474084323316491, 'eval_train-evaluator_spearman_dot': 0.9707153633836624, 'eval_train-evaluator_pearson_max': 0.9474084323316491, 'eval_train-evaluator_spearman_max': 0.9707158509849902, 'eval_runtime': 43.7917, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 55.62}


 67%|██████▋   | 1600/2400 [3:19:08<1:38:00,  7.35s/it]

{'loss': 2.6351, 'grad_norm': 17.015674591064453, 'learning_rate': 7.4074074074074075e-06, 'epoch': 59.29}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.81it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 67%|██████▋   | 1600/2400 [3:19:52<1:38:00,  7.35s/it]

{'eval_train-evaluator_pearson_cosine': 0.9443879326165362, 'eval_train-evaluator_spearman_cosine': 0.9724707666075179, 'eval_train-evaluator_pearson_manhattan': 0.9044068713597975, 'eval_train-evaluator_spearman_manhattan': 0.972476186603584, 'eval_train-evaluator_pearson_euclidean': 0.9048409267297501, 'eval_train-evaluator_spearman_euclidean': 0.9724707488649862, 'eval_train-evaluator_pearson_dot': 0.9443879327329033, 'eval_train-evaluator_spearman_dot': 0.9724696920849177, 'eval_train-evaluator_pearson_max': 0.9443879327329033, 'eval_train-evaluator_spearman_max': 0.972476186603584, 'eval_runtime': 43.5947, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 59.29}


 71%|███████   | 1700/2400 [3:31:29<46:54,  4.02s/it]  

{'loss': 2.5587, 'grad_norm': 0.0, 'learning_rate': 6.481481481481482e-06, 'epoch': 63.08}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.78it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.43it/s]
 71%|███████   | 1700/2400 [3:32:13<46:54,  4.02s/it]

{'eval_train-evaluator_pearson_cosine': 0.9409639539068303, 'eval_train-evaluator_spearman_cosine': 0.9669618930062404, 'eval_train-evaluator_pearson_manhattan': 0.9009857189821365, 'eval_train-evaluator_spearman_manhattan': 0.9665038802397865, 'eval_train-evaluator_pearson_euclidean': 0.9009341335874386, 'eval_train-evaluator_spearman_euclidean': 0.9669617738473211, 'eval_train-evaluator_pearson_dot': 0.9409639354452991, 'eval_train-evaluator_spearman_dot': 0.9669613207542701, 'eval_train-evaluator_pearson_max': 0.9409639539068303, 'eval_train-evaluator_spearman_max': 0.9669618930062404, 'eval_runtime': 44.1436, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 63.08}


 75%|███████▌  | 1800/2400 [3:43:57<1:16:39,  7.67s/it]

{'loss': 2.586, 'grad_norm': 15.017556190490723, 'learning_rate': 5.555555555555557e-06, 'epoch': 66.75}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.79it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.41it/s]
 75%|███████▌  | 1800/2400 [3:44:42<1:16:39,  7.67s/it]

{'eval_train-evaluator_pearson_cosine': 0.941246418820325, 'eval_train-evaluator_spearman_cosine': 0.9653379317470003, 'eval_train-evaluator_pearson_manhattan': 0.9018480328613957, 'eval_train-evaluator_spearman_manhattan': 0.9649316681203086, 'eval_train-evaluator_pearson_euclidean': 0.9018621535308303, 'eval_train-evaluator_spearman_euclidean': 0.9653379829500605, 'eval_train-evaluator_pearson_dot': 0.9412464209936319, 'eval_train-evaluator_spearman_dot': 0.9653375870915549, 'eval_train-evaluator_pearson_max': 0.9412464209936319, 'eval_train-evaluator_spearman_max': 0.9653379829500605, 'eval_runtime': 44.4009, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 66.75}


 79%|███████▉  | 1900/2400 [3:56:08<1:05:34,  7.87s/it]

{'loss': 2.4363, 'grad_norm': 22.36496925354004, 'learning_rate': 4.62962962962963e-06, 'epoch': 70.42}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.81it/s]
Batches: 100%|██████████| 35/35 [00:25<00:00,  1.39it/s]
 79%|███████▉  | 1900/2400 [3:56:52<1:05:34,  7.87s/it]

{'eval_train-evaluator_pearson_cosine': 0.9355518585575027, 'eval_train-evaluator_spearman_cosine': 0.9599554409867357, 'eval_train-evaluator_pearson_manhattan': 0.8979085896059616, 'eval_train-evaluator_spearman_manhattan': 0.9596416754382763, 'eval_train-evaluator_pearson_euclidean': 0.8976493223765803, 'eval_train-evaluator_spearman_euclidean': 0.9599555432069675, 'eval_train-evaluator_pearson_dot': 0.935551859430266, 'eval_train-evaluator_spearman_dot': 0.9599551854554551, 'eval_train-evaluator_pearson_max': 0.935551859430266, 'eval_train-evaluator_spearman_max': 0.9599555432069675, 'eval_runtime': 44.6525, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 70.42}


 83%|████████▎ | 2000/2400 [4:08:17<34:30,  5.18s/it]  

{'loss': 2.3777, 'grad_norm': 46.363739013671875, 'learning_rate': 3.7037037037037037e-06, 'epoch': 74.08}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.81it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.43it/s]
 83%|████████▎ | 2000/2400 [4:09:01<34:30,  5.18s/it]

{'eval_train-evaluator_pearson_cosine': 0.9323771887552147, 'eval_train-evaluator_spearman_cosine': 0.9595761140156568, 'eval_train-evaluator_pearson_manhattan': 0.8924367802302463, 'eval_train-evaluator_spearman_manhattan': 0.9595833289087914, 'eval_train-evaluator_pearson_euclidean': 0.8923037125424118, 'eval_train-evaluator_spearman_euclidean': 0.9595761140156568, 'eval_train-evaluator_pearson_dot': 0.9323771870697887, 'eval_train-evaluator_spearman_dot': 0.9595754801989408, 'eval_train-evaluator_pearson_max': 0.9323771887552147, 'eval_train-evaluator_spearman_max': 0.9595833289087914, 'eval_runtime': 44.0094, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 74.08}


 88%|████████▊ | 2100/2400 [4:20:49<37:37,  7.53s/it]  

{'loss': 2.4611, 'grad_norm': 10.306602478027344, 'learning_rate': 2.7777777777777783e-06, 'epoch': 77.88}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.80it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 88%|████████▊ | 2100/2400 [4:21:33<37:37,  7.53s/it]

{'eval_train-evaluator_pearson_cosine': 0.9309936283401575, 'eval_train-evaluator_spearman_cosine': 0.955614795825658, 'eval_train-evaluator_pearson_manhattan': 0.8916055066911454, 'eval_train-evaluator_spearman_manhattan': 0.9550866074731895, 'eval_train-evaluator_pearson_euclidean': 0.8918430023156332, 'eval_train-evaluator_spearman_euclidean': 0.9556148453280314, 'eval_train-evaluator_pearson_dot': 0.9309936251304095, 'eval_train-evaluator_spearman_dot': 0.9556139209243997, 'eval_train-evaluator_pearson_max': 0.9309936283401575, 'eval_train-evaluator_spearman_max': 0.9556148453280314, 'eval_runtime': 43.8595, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 77.88}


 92%|█████████▏| 2200/2400 [4:32:49<27:02,  8.11s/it]  

{'loss': 2.3333, 'grad_norm': 22.306962966918945, 'learning_rate': 1.8518518518518519e-06, 'epoch': 81.54}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.76it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 92%|█████████▏| 2200/2400 [4:33:34<27:02,  8.11s/it]

{'eval_train-evaluator_pearson_cosine': 0.9280355744822444, 'eval_train-evaluator_spearman_cosine': 0.9536107824063469, 'eval_train-evaluator_pearson_manhattan': 0.8880807266970501, 'eval_train-evaluator_spearman_manhattan': 0.9530098167937165, 'eval_train-evaluator_pearson_euclidean': 0.8883786031523874, 'eval_train-evaluator_spearman_euclidean': 0.9536107824063469, 'eval_train-evaluator_pearson_dot': 0.9280355763285825, 'eval_train-evaluator_spearman_dot': 0.9536108839613462, 'eval_train-evaluator_pearson_max': 0.9280355763285825, 'eval_train-evaluator_spearman_max': 0.9536108839613462, 'eval_runtime': 44.1479, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 81.54}


 96%|█████████▌| 2300/2400 [4:44:56<11:48,  7.09s/it]  

{'loss': 2.2937, 'grad_norm': 18.65142059326172, 'learning_rate': 9.259259259259259e-07, 'epoch': 85.21}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.82it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.44it/s]
 96%|█████████▌| 2300/2400 [4:45:40<11:48,  7.09s/it]

{'eval_train-evaluator_pearson_cosine': 0.9277420713264913, 'eval_train-evaluator_spearman_cosine': 0.9543842833911896, 'eval_train-evaluator_pearson_manhattan': 0.8883524251574323, 'eval_train-evaluator_spearman_manhattan': 0.9534302892870224, 'eval_train-evaluator_pearson_euclidean': 0.8886678630163947, 'eval_train-evaluator_spearman_euclidean': 0.9543817459021411, 'eval_train-evaluator_pearson_dot': 0.9277420647132231, 'eval_train-evaluator_spearman_dot': 0.9543837034570523, 'eval_train-evaluator_pearson_max': 0.9277420713264913, 'eval_train-evaluator_spearman_max': 0.9543842833911896, 'eval_runtime': 43.4804, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 85.21}


100%|██████████| 2400/2400 [4:57:23<00:00,  5.65s/it]

{'loss': 2.3574, 'grad_norm': 0.27893415093421936, 'learning_rate': 0.0, 'epoch': 89.0}


Batches: 100%|██████████| 35/35 [00:19<00:00,  1.83it/s]
Batches: 100%|██████████| 35/35 [00:24<00:00,  1.42it/s]
100%|██████████| 2400/2400 [4:58:07<00:00,  5.65s/it]

{'eval_train-evaluator_pearson_cosine': 0.9275069308083301, 'eval_train-evaluator_spearman_cosine': 0.9533061052805893, 'eval_train-evaluator_pearson_manhattan': 0.8881402151123426, 'eval_train-evaluator_spearman_manhattan': 0.952331818183709, 'eval_train-evaluator_pearson_euclidean': 0.8884187837558797, 'eval_train-evaluator_spearman_euclidean': 0.9533021486755879, 'eval_train-evaluator_pearson_dot': 0.9275069262105775, 'eval_train-evaluator_spearman_dot': 0.9533056672861877, 'eval_train-evaluator_pearson_max': 0.9275069308083301, 'eval_train-evaluator_spearman_max': 0.9533061052805893, 'eval_runtime': 43.844, 'eval_samples_per_second': 0.0, 'eval_steps_per_second': 0.0, 'epoch': 89.0}


100%|██████████| 2400/2400 [4:58:09<00:00,  7.45s/it]

{'train_runtime': 17889.8727, 'train_samples_per_second': 3.119, 'train_steps_per_second': 0.134, 'train_loss': 3.1949695523579917, 'epoch': 89.0}





TrainOutput(global_step=2400, training_loss=3.1949695523579917, metrics={'train_runtime': 17889.8727, 'train_samples_per_second': 3.119, 'train_steps_per_second': 0.134, 'total_flos': 0.0, 'train_loss': 3.1949695523579917, 'epoch': 89.0})

In [19]:
model.save_pretrained("fine-tuned/models/chatPMB-pretrained-2")

# Get Tags by index

In [21]:
labels = pd.read_csv('data/dataset-question-v2.csv')
labels = labels.iloc[:-12]
labels = labels['tag']
labels

0         gr_hi
1         gr_ha
2         gr_pa
3         gr_si
4         gr_so
         ...   
541    beasiswa
542    beasiswa
543    beasiswa
544    beasiswa
545    beasiswa
Name: tag, Length: 546, dtype: object

# Usage of Pretrained Model

In [37]:
model = SentenceTransformer("fine-tuned/models/chatPMB-pretrained-2")

In [None]:
sentence = model.encode("Berikan saya informasi seputar jalur masuk SNBT")
to_test = model.encode(df['sentence_2'])

In [None]:
result = np.array(model.similarity(sentence, to_test))
index = np.argmax(result)
confidence = np.max(result)
detected_label = labels[index]
index, detected_label, result, confidence

(78,
 'jm_s1',
 array([[ 0.13337867,  0.29719597,  0.2966455 ,  0.2880215 ,  0.23856631,
          0.16570197,  0.33405572,  0.30675748,  0.36961758,  0.3351106 ,
          0.3716225 ,  0.37327492,  0.33403504,  0.44880426,  0.3478146 ,
          0.16618124,  0.15524584,  0.2868624 ,  0.27764806,  0.33482254,
          0.3877466 ,  0.3975867 ,  0.2877016 ,  0.30817634,  0.3870708 ,
          0.2539267 ,  0.40455842,  0.32194304,  0.36637944,  0.35080016,
          0.28612888,  0.32021773,  0.25793436,  0.27093685,  0.3716225 ,
          0.3478146 ,  0.42094195,  0.2868624 ,  0.36961758,  0.27647585,
          0.30668586,  0.3273719 ,  0.34952635,  0.27093685,  0.37501466,
          0.35411066,  0.4358852 ,  0.30603254,  0.3152501 ,  0.39809006,
          0.3453277 ,  0.411498  ,  0.36857313,  0.26992297,  0.2980612 ,
          0.3504343 ,  0.3009507 ,  0.5973962 ,  0.5125232 ,  0.62857425,
          0.37957358,  0.5355717 ,  0.42671186,  0.3279971 ,  0.60490924,
          0.48414862,  