# Sharing pretrained models (PyTorch)

## Using the push_to_hub API

You will need to setup git, adapt your email and name in the following cell.

In [None]:
!git config --global user.email "you@example.com"
!git config --global user.name "Your Name"

In [4]:
!git config --global --list

credential.helper=store


You will also need to be logged in to the Hugging Face Hub. Execute the following and enter your credentials.

In [2]:
from huggingface_hub import notebook_login

notebook_login()

Login successful
Your token has been saved to /root/.huggingface/token


In [7]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    "bert-finetuned-mrpc", save_strategy="epoch", push_to_hub=True
)

In [8]:
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Downloading:   0%|          | 0.00/508 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/811k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

In [9]:
model.push_to_hub("dummy-model")

CommitInfo(commit_url='https://huggingface.co/pallavi176/dummy-model/commit/550273f6c3d26dfd628002e81cc223acf23a6e19', commit_message='Upload CamembertForMaskedLM', commit_description='', oid='550273f6c3d26dfd628002e81cc223acf23a6e19', pr_url=None, pr_revision=None, pr_num=None)

In [10]:
tokenizer.push_to_hub("dummy-model")

CommitInfo(commit_url='https://huggingface.co/pallavi176/dummy-model/commit/dba68740034aea1ef44ba739f6a7c7b3a64e4bc3', commit_message='Upload tokenizer', commit_description='', oid='dba68740034aea1ef44ba739f6a7c7b3a64e4bc3', pr_url=None, pr_revision=None, pr_num=None)

In [12]:
#tokenizer.push_to_hub("dummy-model", organization="huggingface")

In [13]:
#tokenizer.push_to_hub("dummy-model", organization="huggingface", use_auth_token="<TOKEN>")

## Using the huggingface_hub Python library

In [15]:
! pip install huggingface_hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from huggingface_hub import (
    # User management
    login,
    logout,
    whoami,

    # Repository creation and management
    create_repo,
    delete_repo,
    update_repo_visibility,

    # And some methods to retrieve/change information about the content
    list_models,
    list_datasets,
    list_metrics,
    list_repo_files,
    upload_file,
    delete_file,
)

In [17]:
import huggingface_hub

In [18]:
from huggingface_hub import create_repo

create_repo("dummy-model1")

'https://huggingface.co/pallavi176/dummy-model1'

In [None]:
from huggingface_hub import create_repo

#create_repo("dummy-model", organization="huggingface")

## Using the web interface

## Uploading the model files

### The upload_file approach

In [None]:
from huggingface_hub import upload_file

upload_file(
    "<path_to_file>/config.json",
    path_in_repo="config.json",
    repo_id="<namespace>/dummy-model2",
)

### The Repository class

In [None]:
from huggingface_hub import Repository

repo = Repository("<path_to_dummy_folder>", clone_from="<namespace>/dummy-model2")

In [None]:
repo.git_pull()
repo.git_add()
repo.git_commit()
repo.git_push()
repo.git_tag()

In [None]:
repo.git_pull()

In [None]:
model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")

In [None]:
repo.git_add()
repo.git_commit("Add model and tokenizer files")
repo.git_push()

### The git-based approach
- git lfs install
- git clone https://huggingface.co/<namespace>/<your-model-id>
- git clone https://huggingface.co/pallavi176/dummy-model2
- cd dummy && ls

In [None]:
from transformers import AutoModelForMaskedLM, AutoTokenizer

checkpoint = "camembert-base"

model = AutoModelForMaskedLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

# Do whatever with the model, train it, fine-tune it...

model.save_pretrained("<path_to_dummy_folder>")
tokenizer.save_pretrained("<path_to_dummy_folder>")

- git add .
- git status
- git lfs status
- git commit -m "First model version"
- git push

## Managing a repo on the Model Hub

#### Upload files using huggingface_hub library

In [1]:
!pip install huggingface_hub

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting huggingface_hub
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 7.4 MB/s 
Installing collected packages: huggingface-hub
Successfully installed huggingface-hub-0.10.1


In [15]:
from huggingface_hub import notebook_login
# Login with huggingface write access token
notebook_login()

Login successful
Your token has been saved to /root/.huggingface/token


In [4]:
from huggingface_hub import upload_file

In [5]:
#upload_file("Current loaction of the file", 'path of the file in repo', 'id of the repo we are pushing')
#upload_file("path_to_file", 'path_in__file in _repo', '<namespace>/<repo_id>')

In [6]:
with open("/tmp/README.md", "w+") as f:
  f.write("# My dummy model")

In [9]:
upload_file(path_or_fileobj="/tmp/README.md", path_in_repo="README.md", repo_id="pallavi176/dummy_model3")

'https://huggingface.co/pallavi176/dummy_model3/blob/main/README.md'

In [17]:
from huggingface_hub import delete_file
delete_file(path_in_repo="README.md", repo_id="pallavi176/dummy-model2")

CommitInfo(commit_url='https://huggingface.co/pallavi176/dummy-model2/commit/a31472d3fe372a3fa7286a930c163f409e788dfe', commit_message='Delete README.md with huggingface_hub', commit_description='', oid='a31472d3fe372a3fa7286a930c163f409e788dfe', pr_url=None, pr_revision=None, pr_num=None)

#### Uploading using repository utility

In [18]:
from huggingface_hub import Repository
repo = Repository("local-folder", clone_from="pallavi176/dummy_model3")

Cloning https://huggingface.co/pallavi176/dummy_model3 into local empty directory.


In [None]:
from transformer import AutoModelForSequenceClassification, AutoTokenizer
model = AutoModelForSequenceClassification.from_pretrained("/tmp/cool-model")
tokenizer = AutoTokenizer.from_pretrained("/tmp/cool-model")
repo.git_pull()

In [None]:
model.save_pretrained(repo.local_dir)
tokenizer.save_pretrained(repo.local_dir)

In [None]:
repo.git_add()
repo.git_commit("Added model and tokenizer")
repo.git_push()
repo.git_tag()

https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/videos/training_loop.ipynb
- Try to train this model and uploaded its model & tokenizers

## The Push to Hub API (PyTorch)

In [19]:
! pip install datasets transformers[sentencepiece]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [20]:
from huggingface_hub import notebook_login
# Login with huggingface write access token
notebook_login()

Login successful
Your token has been saved to /root/.huggingface/token


In [21]:
from datasets import load_dataset, load_metric

raw_datasets = load_dataset("glue", "cola")

Downloading builder script:   0%|          | 0.00/28.8k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Downloading and preparing dataset glue/cola to /root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


Downloading data:   0%|          | 0.00/377k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8551 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1043 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1063 [00:00<?, ? examples/s]

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [22]:
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1063
    })
})

In [23]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [24]:
tokenizer

PreTrainedTokenizerFast(name_or_path='bert-base-cased', vocab_size=28996, model_max_len=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [25]:
def preprocess_function(examples):
    return tokenizer(examples["sentence"], truncation=True)

tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)

  0%|          | 0/9 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

In [26]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 8551
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1043
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1063
    })
})

In [27]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [28]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [29]:
import numpy as np
from datasets import load_metric

metric = load_metric("glue", "cola")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  after removing the cwd from sys.path.


Downloading builder script:   0%|          | 0.00/1.84k [00:00<?, ?B/s]

In [30]:
from transformers import TrainingArguments

args = TrainingArguments(
    "bert-fine-tuned-cola",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=True, # Push to hub
)

In [31]:
from transformers import Trainer

trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)
trainer.train()

Cloning https://huggingface.co/pallavi176/bert-fine-tuned-cola into local empty directory.
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence, idx. If sentence, idx are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 8551
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 3207
  Number of trainable parameters = 108311810
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Matthews Correlation
1,0.4785,0.526487,0.499573
2,0.3162,0.662559,0.570118
3,0.1779,0.81358,0.577859


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: sentence, idx. If sentence, idx are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1043
  Batch size = 8
Saving model checkpoint to bert-fine-tuned-cola/checkpoint-1069
Configuration saved in bert-fine-tuned-cola/checkpoint-1069/config.json
Model weights saved in bert-fine-tuned-cola/checkpoint-1069/pytorch_model.bin
tokenizer config file saved in bert-fine-tuned-cola/checkpoint-1069/tokenizer_config.json
Special tokens file saved in bert-fine-tuned-cola/checkpoint-1069/special_tokens_map.json
tokenizer config file saved in bert-fine-tuned-cola/tokenizer_config.json
Special tokens file saved in bert-fine-tuned-cola/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassificatio

TrainOutput(global_step=3207, training_loss=0.33514476736869847, metrics={'train_runtime': 331.1323, 'train_samples_per_second': 77.471, 'train_steps_per_second': 9.685, 'total_flos': 244397281547520.0, 'train_loss': 0.33514476736869847, 'epoch': 3.0})

In [32]:
trainer.push_to_hub("End of training")

Saving model checkpoint to bert-fine-tuned-cola
Configuration saved in bert-fine-tuned-cola/config.json
Model weights saved in bert-fine-tuned-cola/pytorch_model.bin
tokenizer config file saved in bert-fine-tuned-cola/tokenizer_config.json
Special tokens file saved in bert-fine-tuned-cola/special_tokens_map.json
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.


Upload file pytorch_model.bin:   0%|          | 3.34k/413M [00:00<?, ?B/s]

Upload file runs/Nov05_11-31-58_a8ae45916e94/events.out.tfevents.1667648026.a8ae45916e94.77.0:  57%|#####7    …

remote: Scanning LFS files for validity, may be slow...        
remote: LFS file scan complete.        
To https://huggingface.co/pallavi176/bert-fine-tuned-cola
   5fb3800..731fa41  main -> main

remote: LFS file scan complete.        
To https://huggingface.co/pallavi176/bert-fine-tuned-cola
   5fb3800..731fa41  main -> main

To https://huggingface.co/pallavi176/bert-fine-tuned-cola
   731fa41..658031c  main -> main

   731fa41..658031c  main -> main



'https://huggingface.co/pallavi176/bert-fine-tuned-cola/commit/731fa41efcdade01be5b678664c4ecb27d1bcf90'

### Pusing components individually

In [33]:
repo_name = "bert-fine-tuned-cola"

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

Configuration saved in bert-fine-tuned-cola/config.json
Model weights saved in bert-fine-tuned-cola/pytorch_model.bin
Uploading the following files to pallavi176/bert-fine-tuned-cola: pytorch_model.bin,config.json
tokenizer config file saved in bert-fine-tuned-cola/tokenizer_config.json
Special tokens file saved in bert-fine-tuned-cola/special_tokens_map.json
Uploading the following files to pallavi176/bert-fine-tuned-cola: vocab.txt,tokenizer_config.json,tokenizer.json,special_tokens_map.json


CommitInfo(commit_url='https://huggingface.co/pallavi176/bert-fine-tuned-cola/commit/57f9501464ccbb199a6e1d766df86ba8ade301a8', commit_message='Upload tokenizer', commit_description='', oid='57f9501464ccbb199a6e1d766df86ba8ade301a8', pr_url=None, pr_revision=None, pr_num=None)

### To fix the existing labels on the hub:

In [34]:
label_names = raw_datasets["train"].features["label"].names
label_names

['unacceptable', 'acceptable']

In [35]:
model.config.id2label = {str(i): lbl for i, lbl in enumerate(label_names)}
model.config.label2id = {lbl: str(i) for i, lbl in enumerate(label_names)}

In [36]:
repo_name = "bert-fine-tuned-cola"
model.config.push_to_hub(repo_name)

Configuration saved in bert-fine-tuned-cola/config.json
Uploading the following files to pallavi176/bert-fine-tuned-cola: config.json


CommitInfo(commit_url='https://huggingface.co/pallavi176/bert-fine-tuned-cola/commit/027b7edd64c2435c25559fffa2ecb2c81c31327b', commit_message='Upload config', commit_description='', oid='027b7edd64c2435c25559fffa2ecb2c81c31327b', pr_url=None, pr_revision=None, pr_num=None)

### Use uploaded model

In [37]:
from transformers import pipeline

In [38]:
classifier = pipeline("text-classification", model="pallavi176/bert-fine-tuned-cola")

Downloading:   0%|          | 0.00/861 [00:00<?, ?B/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/config.json
Model config BertConfig {
  "_name_or_path": "pallavi176/bert-fine-tuned-cola",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "unacceptable",
    "1": "acceptable"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "acceptable": "1",
    "unacceptable": "0"
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "transformers_ve

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/pytorch_model.bin
All model checkpoint weights were used when initializing BertForSequenceClassification.

All the weights of BertForSequenceClassification were initialized from the model checkpoint at pallavi176/bert-fine-tuned-cola.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertForSequenceClassification for predictions without further training.


Downloading:   0%|          | 0.00/347 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/125 [00:00<?, ?B/s]

loading file vocab.txt from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/vocab.txt
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--pallavi176--bert-fine-tuned-cola/snapshots/027b7edd64c2435c25559fffa2ecb2c81c31327b/tokenizer_config.json


In [39]:
classifier("This is incorrect sentence.")

[{'label': 'acceptable', 'score': 0.8679891228675842}]