**Install Dependencies and Imports**

In [None]:
!pip install -U sentence-transformers
!pip install datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd drive/MyDrive/sentence-transformer

In [None]:
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, losses, models, util
from sentence_transformers.evaluation import TripletEvaluator
from sentence_transformers.readers import InputExample
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import math
import pandas as pd
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator

In [None]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
dataset_name = "embedding-data/QQP_triplets"
model_save_path = "./fine-tune-sentence-transformers"

**Load Model and Dataset**

In [None]:
dataset = load_dataset(dataset_name)
# View first 2 elements of dataset
dataset['train']['set'][:2]

In [None]:
X_train_val = []
data = dataset['train']['set']
for i in range(len(data)):
    if data[i]['query'] and len(data[i]['pos']) >= 1 and len(data[i]['pos']) >= 1:
        X_train_val.append(InputExample(texts=[data[i]['query'], data[i]['pos'][0], data[i]['neg'][0]]))

In [None]:
# Split dataset into 80% training and 20% validation
X_train, X_val = train_test_split(X_train_val, train_size=0.8, random_state=33)

In [None]:
# Hyperparameters
batch_size = 16
test_batch_size = 8
epochs = 5
learning_rate = 2e-5
warmup_steps = math.ceil(len(X_train)/batch_size * epochs * 0.1)

In [None]:
train_dataloader = DataLoader(X_train, shuffle=True, batch_size=batch_size)
val_evaluator = TripletEvaluator.from_input_examples(X_val, name='sts-val', show_progress_bar=True, write_csv=True, batch_size=test_batch_size)

**Fine-tuning Model**

In [None]:
model = SentenceTransformer(model_name)

In [None]:
loss = losses.TripletLoss(model=model)

In [None]:
model.fit(
    train_objectives=[(train_dataloader, loss)],
    evaluator = val_evaluator,
    epochs = epochs,
    warmup_steps = warmup_steps,
    optimizer_params = {'lr': learning_rate},
    output_path = model_save_path
)

In [None]:
val_metrics_df = pd.read_csv(f'{model_save_path}/eval/triplet_evaluation_sts-val_results.csv')
val_metrics_df = val_metrics_df[["epoch", "accuracy_cosinus",	"accuracy_manhattan", "accuracy_euclidean"]]
val_metrics_df

**Evaluation**

In [None]:
# Load the fine-tuned model
model = SentenceTransformer.load('./pic')

In [None]:
# METHOD 1: Calculating the Accuracy
import csv

questions = []
similar_questions_1 = []
similar_questions_2 = []

with open('questions_dataset_final.csv', 'r') as file:
    reader = csv.reader(file)

    next(reader)
    for row in reader:
        questions.append(row[0])
        similar_questions_1.append(row[1])
        similar_questions_2.append(row[2])

# print(questions)
# print(similar_questions_1)
# print(similar_questions_2)

In [None]:
reference_sentences = []
predicted_sentences = []
model = SentenceTransformer(model_name)

passage_embedding = model.encode(questions)

def get_match(query):
  query_embedding = model.encode(query)
  sim_scores = util.dot_score(query_embedding, passage_embedding).numpy()[0]
  sentence_pos = []
  for i in range(len(sim_scores)):
      sentence_pos.append({'index': i, 'score': sim_scores[i]})

  #sort scores in decreasing order
  sentence_pos = sorted(sentence_pos, key=lambda x: x['score'], reverse=True)
  most_sim_pos = sentence_pos[0]['index']
  return questions[most_sim_pos]

for i in range(len(similar_questions_1)):
  reference_sentences.append(questions[i])
  reference_sentences.append(questions[i])

  predicted_sentences.append(get_match(similar_questions_1[i]))
  predicted_sentences.append(get_match(similar_questions_2[i]))

In [None]:
num_correct = 0

for predicted, reference in zip(predicted_sentences, reference_sentences):
    if predicted == reference:
        num_correct += 1

# Calculate the accuracy
accuracy = num_correct / len(predicted_sentences)

print("Accuracy:", accuracy)

In [None]:
# METHOD 2: STSb performance

sts = load_dataset('glue', 'stsb', split='validation')

In [None]:
# Normalize sentence similarity scores in stsb dataset which range from (0,5) to the range(0,1)
sts = sts.map(lambda x: {'label': x['label'] / 5.0})


In [None]:
data = []
for s in sts:
    data.append(InputExample(texts=[s['sentence1'], s['sentence2']],label=s['label']))

In [None]:
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(data, write_csv=True)
# Spearman's Rank Correlation of model on STSb dataset
evaluator(model)