**How can we share pretrained models?**

In [10]:
# Using push_to_hub API on a notebook:

from huggingface_hub import notebook_login

notebook_login()

# on a terminal:
# huggingface-cli login

# on Keras/TensorFlow:
# from transformers import PushToHubCallback

# callback = PushToHubCallback(
#     "bert-finetuned-mrpc", save_strategy="epoch", tokenizer=tokenizer
# )

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
from datasets import load_dataset, load_metric

raw_datasets = load_dataset("glue", "cola")
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})

In [2]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [3]:
# Map the raw dataset to a truncating function
def preprocess_function(examples):
    return tokenizer(examples["sentence"], truncation=True)

tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)

# Call the model
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)

Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
import numpy as np
from datasets import load_metric
from transformers import TrainingArguments

metric = load_metric("glue", "cola")

# Define evaluation metric while training:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Define Training Args
args = TrainingArguments(
    "bert-fine-tuned-cola",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True,
    hub_model_id = "bert-playground"
)

In [11]:
# Define Trainer Class with the model, args, train and eval datasets, compute metrics and tokenizer
from transformers import Trainer

trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

Cloning https://huggingface.co/antoineross/bert-playground into local empty directory.


  0%|          | 0/3207 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.5452, 'learning_rate': 1.688182101652635e-05, 'epoch': 0.47}
{'loss': 0.4401, 'learning_rate': 1.3763642033052697e-05, 'epoch': 0.94}


  0%|          | 0/131 [00:00<?, ?it/s]

{'eval_loss': 0.4154660701751709, 'eval_matthews_correlation': 0.5719840023272299, 'eval_runtime': 4.1978, 'eval_samples_per_second': 248.464, 'eval_steps_per_second': 31.207, 'epoch': 1.0}
{'loss': 0.3202, 'learning_rate': 1.0645463049579046e-05, 'epoch': 1.4}
{'loss': 0.3121, 'learning_rate': 7.527284066105395e-06, 'epoch': 1.87}


  0%|          | 0/131 [00:00<?, ?it/s]

{'eval_loss': 0.6456720232963562, 'eval_matthews_correlation': 0.6039307167689609, 'eval_runtime': 3.6375, 'eval_samples_per_second': 286.735, 'eval_steps_per_second': 36.014, 'epoch': 2.0}
{'loss': 0.2052, 'learning_rate': 4.409105082631744e-06, 'epoch': 2.34}
{'loss': 0.1764, 'learning_rate': 1.2909260991580918e-06, 'epoch': 2.81}


  0%|          | 0/131 [00:00<?, ?it/s]

{'eval_loss': 0.8178486824035645, 'eval_matthews_correlation': 0.606823117358914, 'eval_runtime': 3.7945, 'eval_samples_per_second': 274.871, 'eval_steps_per_second': 34.524, 'epoch': 3.0}
{'train_runtime': 457.5304, 'train_samples_per_second': 56.068, 'train_steps_per_second': 7.009, 'train_loss': 0.3251647187944584, 'epoch': 3.0}


TrainOutput(global_step=3207, training_loss=0.3251647187944584, metrics={'train_runtime': 457.5304, 'train_samples_per_second': 56.068, 'train_steps_per_second': 7.009, 'train_loss': 0.3251647187944584, 'epoch': 3.0})

In [12]:
trainer.push_to_hub("End of training")

Several commits (2) will be pushed upstream.
The progress bars may be unreliable.


Upload file pytorch_model.bin:   0%|          | 1.00/413M [00:00<?, ?B/s]

To https://huggingface.co/antoineross/bert-playground
   a92f012..15954cd  main -> main

To https://huggingface.co/antoineross/bert-playground
   15954cd..c222882  main -> main



'https://huggingface.co/antoineross/bert-playground/commit/15954cd102e5d987682421ac165cb43cc15ceafd'