<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/SETFIT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://github.com/Connatix/huggingface-setfit

In [None]:
!pip install git+https://github.com/huggingface/setfit.git -q

In [None]:
!pip install colab-env --quiet

In [3]:
import colab_env
import os

access_token_write = os.getenv("HUGGINGFACE_ACCESS_TOKEN_WRITE")

from huggingface_hub import login

login(
  token=access_token_write,
  add_to_git_credential=True
)

Mounted at /content/gdrive
Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
!pip install IPython

In [None]:
from datasets import load_dataset
from sentence_transformers.losses import CosineSimilarityLoss

from setfit import SetFitModel, SetFitTrainer, sample_dataset


# Load a dataset from the Hugging Face Hub
dataset = load_dataset("sst2")

# Simulate the few-shot regime by sampling 8 examples per class
train_dataset = sample_dataset(dataset["train"], label_column="label", num_samples=8)
eval_dataset = dataset["validation"]

# Load a SetFit model from Hub
model = SetFitModel.from_pretrained("sentence-transformers/paraphrase-mpnet-base-v2")

# Create trainer
trainer = SetFitTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss_class=CosineSimilarityLoss,
    metric="accuracy",
    batch_size=16,
    num_iterations=20, # The number of text pairs to generate for contrastive learning
    num_epochs=1, # The number of epochs to use for contrastive learning
    column_mapping={"sentence": "text", "label": "label"} # Map dataset columns to text/label expected by trainer
)

# Train and evaluate
trainer.train()
metrics = trainer.evaluate()

# Push model to the Hub
trainer.push_to_hub("frankmorales2020/my-awesome-setfit-model")

# Download from Hub and run inference
model = SetFitModel.from_pretrained("frankmorales2020/my-awesome-setfit-model")

In [11]:
# Run inference
preds = model(["i loved the spiderman movie!", "pineapple on pizza is the worst 🤮"])

In [12]:
preds

tensor([1, 0])

In [13]:
# Run inference
preds = model(["i loved the spiderman movie!", "good weather"])

preds

tensor([1, 1])

In [14]:
# Run inference
preds = model(["i hate the spiderman movie!", "bad weather"])

preds

tensor([0, 0])

In [15]:
!pip show setfit

Name: setfit
Version: 1.1.0.dev0
Summary: Efficient few-shot learning with Sentence Transformers
Home-page: https://github.com/huggingface/setfit
Author: 
Author-email: 
License: Apache 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: datasets, evaluate, huggingface-hub, packaging, scikit-learn, sentence-transformers
Required-by: 
