In [None]:
#Install libraries quietly
pip install -qqq datasets transformers pytorch-lightning wandb

In [2]:
import torchmetrics
import pytorch_lightning as pl

from torch.utils.data import DataLoader

from transformers import ViTFeatureExtractor
from transformers import ViTForImageClassification

In [None]:
# Data Stuff
import torch
from datasets import load_dataset

batch_size= 8
model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

# Load dataset
ds = load_dataset('beans')
prepared_ds = ds.with_transform(transform)

labels_names = ds['train'].features['labels'].names
n_labels = len(labels_names)

# create dataloaders
train_dataloader = DataLoader(prepared_ds["train"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

val_dataloader = DataLoader(prepared_ds["validation"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

# test_dataloader = DataLoader(prepared_ds["test"],
#                       batch_size=batch_size,
#                       collate_fn=collate_fn)


In [4]:
# Model
from torch.optim import AdamW

class Vit(pl.LightningModule):
  """
  https://huggingface.co/docs/transformers/model_doc/vit#transformers.ViTForImageClassification
  """

  def __init__(self, num_labels=3, lr = 2e-4):
      super().__init__()

      self.model_name_or_path = 'google/vit-base-patch16-224-in21k'
      self.model = ViTForImageClassification.from_pretrained(self.model_name_or_path,
                                                            num_labels = num_labels)

      # log hyperparameters
      # https://www.youtube.com/watch?v=hUXQm46TAKc&list=PLD80i8An1OEGajeVo15ohAQYF1Ttle0lk&index=4
      self.save_hyperparameters()

      # Accuracy
      self.train_acc = torchmetrics.Accuracy()
      self.val_acc = torchmetrics.Accuracy()
      # self.test_acc = torchmetrics.Accuracy()

  def forward(self, pixel_values, labels):
      outs = self.model(pixel_values = pixel_values, labels=labels)

      loss = outs.loss
      logits = outs.logits

      return loss, logits

  def training_step(self, batch, batch_idx):
      # https://huggingface.co/docs/transformers/v4.24.0/en/model_doc/vit#transformers.ViTForImageClassification.forward
      # loss (torch.FloatTensor of shape (1,), optional, returned when labels is provided)

      loss, logits = self(pixel_values = batch["pixel_values"],
                          labels=batch["labels"])

      self.train_acc(logits, batch["labels"])

      self.log("train/loss", loss, on_step=True, on_epoch=True, prog_bar=True)
      # self.log("train/acc", self.train_acc, on_step=True, on_epoch=True)
      self.log("train/acc", self.train_acc, on_epoch=True, prog_bar=True)
      outputs = {"loss": loss}
      return outputs

  def validation_step(self, batch, batch_idx):

      loss, logits = self(pixel_values = batch["pixel_values"],
                    labels=batch["labels"])

      self.val_acc(logits, batch["labels"])

      self.log("validation/loss", loss, prog_bar=True, sync_dist=True)
      self.log("validation/acc", self.val_acc, on_step=False, on_epoch=True, prog_bar=True)

      outputs = {"loss": loss}
      return outputs

  def configure_optimizers(self):
    # self.hparams comes from self.save_hyperparameters()
    return AdamW(self.parameters(), lr=self.hparams["lr"])

model = Vit(n_labels = n_labels)


In [None]:
# Weights and Biases (logger)
# https://github.com/full-stack-deep-learning/fsdl-text-recognizer-2022-labs/blob/main/lab04/training/run_experiment.py

import wandb
from pathlib import Path

# Login to use the wandb site
wandb.login()

# logger
log_dir = Path("training") / "logs"
log_every_n_steps = 50
print(log_dir)

logger = pl.loggers.WandbLogger(project = "pytorch-lightning", log_model="all", save_dir=str(log_dir), job_type="train")
logger.watch(model, log_freq=max(100, log_every_n_steps))

experiment_dir = logger.experiment.dir

In [None]:
# Trainer
# https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html#pytorch_lightning.trainer.trainer.Trainer

# Notes:
# gpus=int(torch.cuda.is_available()) will be removed on pytorch lightning v2.0
# The recommendation is to use devices and accelerator instead.

trainer = pl.Trainer(accelerator = 'gpu',
                     devices = int(torch.cuda.is_available()),
                     max_epochs = 5,
                     precision = 32,
                     log_every_n_steps = log_every_n_steps,
                     logger = logger)


trainer.fit(model = model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)



# Notes: pytorch lightning vs hugging face interface and hyperparameters

* precision used on hugging face is fp16. The closest thing available in
pytorch lightning is bf16, which is different from fp16 (learn more about it in
the reddit post : https://www.reddit.com/r/MachineLearning/comments/vndtn8/d_mixed_precision_training_difference_between/).
It will be kept 32, because we can afford it.
      Similar to (hugging face): fp16=True,

* log_every_n_steps: similar options available in hugging face are
      save_steps=100,
      eval_steps=100,
      logging_steps=10,

* max_epochs
        Similar to (hugging face): num_train_epochs=4,

* learning rate is set on the model, not on the trainer on pytorch lightning
        Similar to (hugging face): learning_rate=2e-4,

* options that do not have a correspondent between hugging face and lightning
or are not needed

        remove_unused_columns=False,
        push_to_hub=False,
        evaluation_strategy="steps",
        per_device_train_batch_size=16,
        save_total_limit=2,

* Logger
        Similar to (hugging face): report_to='tensorboard',

* Natively pytorch lightning already does the hugging face functionality
  load_best_model_at_end=True,

* Directory: default_root_dir (Optional[str]) – Default path for logs and weights
             when no logger/ckpt_callback passed. Default: os.getcwd().
             Can be remote file paths such as s3://mybucket/path or ‘hdfs://path/’.
             Can also be set in the logger settings.

        Similar to (hugging face): output_dir="./vit-base-beans",


In [None]:
# To change the job status to finished on weights and biases
wandb.finish()