In [1]:
import pandas as pd

# Load the datasets
train_df = pd.read_csv('A3_task1_data_files/train.csv', sep='\t')
dev_df = pd.read_csv('A3_task1_data_files/dev.csv', sep='\t')

dev_df.rename(columns={'setence1': 'sentence1'}, inplace=True)

In [2]:
from torch.utils.data import DataLoader
from sentence_transformers import InputExample, datasets

# Convert the DataFrame into a list of InputExample
train_examples = [InputExample(texts=[str(row['sentence1']), str(row['sentence2'])], label = (float(row['score']))/5.0) for _, row in train_df.iterrows()]
dev_examples = [InputExample(texts=[str(row['sentence1']), str(row['sentence2'])], label = (float(row['score']))/5.0) for _, row in dev_df.iterrows()]

# Create a DataLoader
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=64)
dev_dataloader = DataLoader(dev_examples, shuffle=False, batch_size=64)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from sentence_transformers import SentenceTransformer, models, losses

# Load a pre-trained Sentence-BERT model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Use CosineSimilarityLoss
train_loss = losses.CosineSimilarityLoss(model=model)

In [4]:
from sentence_transformers import evaluation

# Define an evaluator using the dev set for evaluation during training
evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(dev_examples, name='dev')

# Fine-tune the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          evaluator=evaluator,
          epochs=10,
          warmup_steps=100,
          output_path='fine-tuned-model')

Iteration: 100%|██████████| 90/90 [00:05<00:00, 15.87it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.82it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.69it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.77it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.75it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.66it/s]
Iteration: 100%|██████████| 90/90 [00:04<00:00, 18.01it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.54it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.47it/s]
Iteration: 100%|██████████| 90/90 [00:05<00:00, 17.42it/s]
Epoch: 100%|██████████| 10/10 [01:01<00:00,  6.11s/it]
