Fine tunning of Vit model based on the tutorial : https://huggingface.co/blog/fine-tune-vit

Pytorch Lightning and Weights and biases documentation.


In [1]:
#Install libraries quietly
!pip install -qqq datasets transformers pytorch-lightning wandb

[K     |████████████████████████████████| 441 kB 5.2 MB/s 
[K     |████████████████████████████████| 5.5 MB 53.1 MB/s 
[K     |████████████████████████████████| 796 kB 60.9 MB/s 
[K     |████████████████████████████████| 1.9 MB 63.0 MB/s 
[K     |████████████████████████████████| 163 kB 63.3 MB/s 
[K     |████████████████████████████████| 95 kB 5.1 MB/s 
[K     |████████████████████████████████| 212 kB 62.0 MB/s 
[K     |████████████████████████████████| 115 kB 72.4 MB/s 
[K     |████████████████████████████████| 127 kB 66.5 MB/s 
[K     |████████████████████████████████| 7.6 MB 50.7 MB/s 
[K     |████████████████████████████████| 136 kB 68.0 MB/s 
[K     |████████████████████████████████| 529 kB 66.3 MB/s 
[K     |████████████████████████████████| 87 kB 7.1 MB/s 
[K     |████████████████████████████████| 182 kB 88.4 MB/s 
[K     |████████████████████████████████| 166 kB 48.1 MB/s 
[K     |████████████████████████████████| 63 kB 1.4 MB/s 
[K     |██████████████████████

In [2]:
import torchmetrics
import pytorch_lightning as pl

from torch.utils.data import DataLoader

from transformers import ViTFeatureExtractor
from transformers import ViTForImageClassification

In [3]:
# Data Stuff
import torch
from datasets import load_dataset

batch_size= 8
model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

# Load dataset
ds = load_dataset('beans')
prepared_ds = ds.with_transform(transform)

labels_names = ds['train'].features['labels'].names

# create dataloaders
train_dataloader = DataLoader(prepared_ds["train"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

val_dataloader = DataLoader(prepared_ds["validation"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

# test_dataloader = DataLoader(prepared_ds["test"],
#                       batch_size=batch_size,
#                       collate_fn=collate_fn)


Downloading:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/3.61k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.24k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/4.74k [00:00<?, ?B/s]

Downloading and preparing dataset beans/default to /root/.cache/huggingface/datasets/beans/default/0.0.0/90c755fb6db1c0ccdad02e897a37969dbf070bed3755d4391e269ff70642d791...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/144M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/18.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/17.7M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/1034 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/133 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/128 [00:00<?, ? examples/s]

Dataset beans downloaded and prepared to /root/.cache/huggingface/datasets/beans/default/0.0.0/90c755fb6db1c0ccdad02e897a37969dbf070bed3755d4391e269ff70642d791. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
# Model
from torch.optim import AdamW

class Vit(pl.LightningModule):
  """
  https://huggingface.co/docs/transformers/model_doc/vit#transformers.ViTForImageClassification
  """

  def __init__(self, num_labels=3, lr = 2e-4):
      super().__init__()

      self.model_name_or_path = 'google/vit-base-patch16-224-in21k'
      self.model = ViTForImageClassification.from_pretrained(self.model_name_or_path,
                                                            num_labels = num_labels)

      # log hyperparameters
      # https://www.youtube.com/watch?v=hUXQm46TAKc&list=PLD80i8An1OEGajeVo15ohAQYF1Ttle0lk&index=4
      self.save_hyperparameters()

      # Accuracy
      self.train_acc = torchmetrics.Accuracy()
      self.val_acc = torchmetrics.Accuracy()
      # self.test_acc = torchmetrics.Accuracy()

  def forward(self, pixel_values, labels):
      outs = self.model(pixel_values = pixel_values, labels=labels)

      loss = outs.loss
      logits = outs.logits

      return loss, logits

  def training_step(self, batch, batch_idx):
      # https://huggingface.co/docs/transformers/v4.24.0/en/model_doc/vit#transformers.ViTForImageClassification.forward
      # loss (torch.FloatTensor of shape (1,), optional, returned when labels is provided)

      loss, logits = self(pixel_values = batch["pixel_values"],
                          labels=batch["labels"])

      self.train_acc(logits, batch["labels"])

      self.log("train/loss", loss, on_step=True, on_epoch=True, prog_bar=True)
      # self.log("train/acc", self.train_acc, on_step=True, on_epoch=True)
      self.log("train/acc", self.train_acc, on_epoch=True, prog_bar=True)
      outputs = {"loss": loss}
      return outputs

  def validation_step(self, batch, batch_idx):

      loss, logits = self(pixel_values = batch["pixel_values"],
                    labels=batch["labels"])

      self.val_acc(logits, batch["labels"])

      self.log("validation/loss", loss, prog_bar=True, sync_dist=True)
      self.log("validation/acc", self.val_acc, on_step=False, on_epoch=True, prog_bar=True)

      outputs = {"loss": loss}
      return outputs

  def configure_optimizers(self):
    # self.hparams comes from self.save_hyperparameters()
    return AdamW(self.parameters(), lr=self.hparams["lr"])

model = Vit(num_labels = len(labels_names))


Downloading:   0%|          | 0.00/502 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Weights and Biases
# https://github.com/full-stack-deep-learning/fsdl-text-recognizer-2022-labs/blob/main/lab04/training/run_experiment.py

import wandb
from pathlib import Path
from pytorch_lightning.loggers import WandbLogger

wandb.login()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 

··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:
# logger
log_dir = Path("training") / "logs"
log_every_n_steps = 50
print(log_dir)

logger = pl.loggers.WandbLogger(project = "pytorch-lightning", log_model="all", save_dir=str(log_dir), job_type="train")
logger.watch(model, log_freq=max(100, log_every_n_steps))

experiment_dir = logger.experiment.dir

training/logs


Hint: Upgrade with `pip install --upgrade wandb`.
  f"Providing log_model={log_model} requires wandb version >= 0.10.22"
[34m[1mwandb[0m: Currently logged in as: [33mcoffeedrunk[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


In [7]:
# Trainer
# https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html#pytorch_lightning.trainer.trainer.Trainer

# Notes:
# gpus=int(torch.cuda.is_available()) will be removed on pytorch lightning v2.0
# The recommendation is to use devices and accelerator instead.

trainer = pl.Trainer(accelerator = 'gpu',
                     devices = int(torch.cuda.is_available()),
                     max_epochs = 5,
                     precision = 32,
                     log_every_n_steps = log_every_n_steps,
                     logger = logger)


trainer.fit(model = model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type                      | Params
--------------------------------------------------------
0 | model     | ViTForImageClassification | 85.8 M
1 | train_acc | Accuracy                  | 0     
2 | val_acc   | Accuracy                  | 0     
--------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
343.204   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


In [8]:
# To change the job status to finished on weights and biases
wandb.finish()

0,1
epoch,▁▁▁▁▃▃▃▃▃▅▅▅▅▆▆▆▆▆█████
train/acc_epoch,█▇▇▇▁
train/acc_step,██████▁██████
train/loss_epoch,▁▂▂▃█
train/loss_step,▅▁▁▁▁▁█▂▁▁▄▃▁
trainer/global_step,▁▂▂▂▂▃▃▃▃▄▅▅▅▅▆▆▆▆▇▇███
validation/acc,▁▁▁▁▁
validation/loss,█▆▄▁▁

0,1
epoch,4.0
train/acc_epoch,0.65184
train/acc_step,1.0
train/loss_epoch,0.94839
train/loss_step,0.03518
trainer/global_step,649.0
validation/acc,0.33083
validation/loss,2.75582



# Notes: pytorch lightning vs hugging face interface and hyperparameters

* precision used on hugging face is fp16. The closest thing available in
pytorch lightning is bf16, which is different from fp16 (learn more about it in
the reddit post : https://www.reddit.com/r/MachineLearning/comments/vndtn8/d_mixed_precision_training_difference_between/).
It will be kept 32, because we can afford it.
      Similar to (hugging face): fp16=True,

* log_every_n_steps: similar options available in hugging face are
      save_steps=100,
      eval_steps=100,
      logging_steps=10,

* max_epochs
        Similar to (hugging face): num_train_epochs=4,

* learning rate is set on the model, not on the trainer on pytorch lightning
        Similar to (hugging face): learning_rate=2e-4,

* options that do not have a correspondent between hugging face and lightning
or are not needed

        remove_unused_columns=False,
        push_to_hub=False,
        evaluation_strategy="steps",
        per_device_train_batch_size=16,
        save_total_limit=2,

* Logger
        Similar to (hugging face): report_to='tensorboard',

* Natively pytorch lightning already does the hugging face functionality
  load_best_model_at_end=True,

* Directory: default_root_dir (Optional[str]) – Default path for logs and weights
             when no logger/ckpt_callback passed. Default: os.getcwd().
             Can be remote file paths such as s3://mybucket/path or ‘hdfs://path/’.
             Can also be set in the logger settings.

        Similar to (hugging face): output_dir="./vit-base-beans",
