# Install Necessary Packages

In [2]:
!pip install transformers[torch]



In [3]:
!pip install -U sentence-transformers



In [4]:
!pip install datasets



In [5]:
from sentence_transformers import SentenceTransformer
import torch


#   Load Dataset

In [18]:

from datasets import load_dataset
dataset = load_dataset("sentence-transformers/all-nli", "triplet")
train_dataset = dataset["train"].select(range(5000))
eval_dataset = dataset["dev"]
test_dataset = dataset["test"]


In [19]:
train_dataset

Dataset({
    features: ['anchor', 'positive', 'negative'],
    num_rows: 5000
})

In [20]:
train_dataset.to_pandas()
# Anchor: The original sentence or query.
# Positive answer: A correct or relevant response to the anchor.
# Negative answer: An incorrect or irrelevant response to the anchor

Unnamed: 0,anchor,positive,negative
0,A person on a horse jumps over a broken down a...,"A person is outdoors, on a horse.","A person is at a diner, ordering an omelette."
1,Children smiling and waving at camera,There are children present,The kids are frowning
2,A boy is jumping on skateboard in the middle o...,The boy does a skateboarding trick.,The boy skates down the sidewalk.
3,Two blond women are hugging one another.,There are women showing affection.,The women are sleeping.
4,"A few people in a restaurant setting, one of t...",The diners are at a restaurant.,The people are sitting at desks in school.
...,...,...,...
4995,The people are outside.,People on ATVs and dirt bikes are traveling al...,A woman in a pink shirt is handing a bag to th...
4996,The people are outside.,People on ATVs and dirt bikes are traveling al...,A small group of adult males enjoy a conversat...
4997,The people are outside.,People on ATVs and dirt bikes are traveling al...,Two guys and one girl are sitting at a table i...
4998,The people are outside.,People on ATVs and dirt bikes are traveling al...,People sitting on black chairs on a bus.


# Load Model

In [None]:

model = SentenceTransformer("all-MiniLM-L6-v2")

# Setting up Training Arguments

In [21]:
from sentence_transformers import SentenceTransformerTrainer, SentenceTransformerTrainingArguments
from sentence_transformers.losses import MultipleNegativesRankingLoss
from sentence_transformers.training_args import BatchSamplers



In [22]:
# 3. Define a loss function
loss = MultipleNegativesRankingLoss(model)


In [23]:
args = SentenceTransformerTrainingArguments(
    output_dir="drive/MyDrive/models/all-MiniLM-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    learning_rate=2e-5,
    warmup_ratio=0.05,
    fp16=True,
    batch_sampler=BatchSamplers.NO_DUPLICATES,
    eval_strategy="steps",
    eval_steps=500,
    save_strategy="epoch",
    save_total_limit=2,
    logging_steps=50,
    run_name="all-MiniLM-finetuned",
)


# Train

In [24]:
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss
)


In [25]:
trainer.train()

Step,Training Loss,Validation Loss


TrainOutput(global_step=471, training_loss=0.2777217553173154, metrics={'train_runtime': 119.4627, 'train_samples_per_second': 125.562, 'train_steps_per_second': 3.943, 'total_flos': 0.0, 'train_loss': 0.2777217553173154, 'epoch': 3.0})

# Test - Model any good?

In [26]:
from sentence_transformers.evaluation import TripletEvaluator

test_evaluator = TripletEvaluator(
    anchors=test_dataset["anchor"],
    positives=test_dataset["positive"],
    negatives=test_dataset["negative"],
    name="all-nli-test",
)

test_evaluator(model)


{'all-nli-test_cosine_accuracy': 0.8762294054031372}