In [None]:
!nvidia-smi

In [None]:
!pip install pandas setfit datasets torch ekphrasis scikit-learn

In [3]:
import torch
import random
import numpy as np
from sentence_transformers.losses import CosineSimilarityLoss
from setfit import SetFitModel
from setfit import SetFitTrainer
from sklearn.metrics import classification_report
from datasets import Dataset
import pandas as pd

In [4]:
seed = 42
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)

In [5]:

train_df = pd.read_csv('./train_set_hand.csv')
test_df = pd.read_csv('./test_set_hand.csv')
full_test_df = pd.read_csv('./test_set.csv')

train_set = Dataset.from_pandas(train_df)
test_set = Dataset.from_pandas(test_df)

In [None]:
model = SetFitModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")

trainer = SetFitTrainer(
    model=model,
    train_dataset=train_set,
    eval_dataset=test_set,
    loss_class=CosineSimilarityLoss,
    num_iterations=20,
    num_epochs=1,
)

In [None]:
trainer.train()

In [None]:
metrics = trainer.evaluate()
metrics

In [None]:
y_pred = trainer.model.predict(test_set['text'])

print(classification_report(test_set['label'], y_pred, digits=4))

In [None]:
y_pred = trainer.model.predict(full_test_df['text'])

print(classification_report(full_test_df['label'], y_pred, digits=4))