In [1]:
#Install libraries quietly
!pip install --quiet datasets transformers pytorch-lightning wandb

You should consider upgrading via the '/usr/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [2]:
import torchmetrics
import pytorch_lightning as pl

from torch.utils.data import DataLoader

from transformers import ViTFeatureExtractor
from transformers import ViTForImageClassification

# Data Stuff
import torch
from datasets import load_dataset

# Model
from torch.optim import AdamW

# Weights and Biases
# https://github.com/full-stack-deep-learning/fsdl-text-recognizer-2022-labs/blob/main/lab04/training/run_experiment.py

import wandb
from pathlib import Path
from pytorch_lightning.loggers import WandbLogger

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mcoffeedrunk[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# Data Stuff


batch_size= 8
model_name_or_path = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name_or_path)

def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')

    # Don't forget to include the labels!
    inputs['labels'] = example_batch['labels']
    inputs['image'] = example_batch['image']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch]),
        'image': [x['image'] for x in batch]
    }

# Load dataset
ds = load_dataset('beans')
prepared_ds = ds.with_transform(transform)

labels_names = ds['train'].features['labels'].names

# create dataloaders
train_dataloader = DataLoader(prepared_ds["train"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

val_dataloader = DataLoader(prepared_ds["validation"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

test_dataloader = DataLoader(prepared_ds["test"],
                      batch_size=batch_size,
                      collate_fn=collate_fn)

Found cached dataset beans (/root/.cache/huggingface/datasets/beans/default/0.0.0/90c755fb6db1c0ccdad02e897a37969dbf070bed3755d4391e269ff70642d791)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
val_dataloader.dataset

Dataset({
    features: ['image_file_path', 'image', 'labels'],
    num_rows: 133
})

In [5]:
# Model
from torch.optim import AdamW

class Vit(pl.LightningModule):
    """
    https://huggingface.co/docs/transformers/model_doc/vit#transformers.ViTForImageClassification
    """

    def __init__(self, num_labels=3, lr = 2e-4):

        super().__init__()

        self.model_name_or_path = 'google/vit-base-patch16-224-in21k'
        self.model = ViTForImageClassification.from_pretrained(self.model_name_or_path,
                                                                num_labels = num_labels)

        # log hyperparameters
        # https://www.youtube.com/watch?v=hUXQm46TAKc&list=PLD80i8An1OEGajeVo15ohAQYF1Ttle0lk&index=4
        self.save_hyperparameters()

        # Accuracy
        self.train_acc = torchmetrics.Accuracy()
        self.val_acc = torchmetrics.Accuracy()
        self.test_acc = torchmetrics.Accuracy()

    def forward(self, x ):
        pixel_values = x["pixel_values"]
        labels = x["labels"]
        
        outs = self.model(pixel_values = pixel_values, labels=labels)

        loss = outs.loss
        logits = outs.logits

        return loss, logits

    def configure_optimizers(self):
        # self.hparams comes from self.save_hyperparameters()
        return AdamW(self.parameters(), lr=self.hparams["lr"])

    def training_step(self, batch, batch_idx):
        # https://huggingface.co/docs/transformers/v4.24.0/en/model_doc/vit#transformers.ViTForImageClassification.forward
        # loss (torch.FloatTensor of shape (1,), optional, returned when labels is provided)

        loss, logits = self(batch)

        self.train_acc(logits, batch["labels"])

        self.log("train/loss", loss,  on_step=True, on_epoch=True, prog_bar=True)
        self.log("train/acc", self.train_acc,  on_epoch=True, prog_bar=True)
        outputs = {"loss": loss}
        return outputs

    def validation_step(self, batch, batch_idx):

        loss, logits = self(batch)

        self.val_acc(logits, batch["labels"])

        self.log("validation/loss", loss, prog_bar=True, sync_dist=True)
        self.log("validation/acc", self.val_acc, on_step=False, on_epoch=True, prog_bar=True)

        outputs = {"loss": loss,
                   "images": batch["image"],
                   "pred": torch.argmax(logits, 1),
                   "label":batch["labels"]}
        return outputs

    # TODO: arrumar esse pedaço aqui e colocar o out do validation_step como os preds ou logits
#     def validation_epoch_end(self, validation_step_outputs):
        
#         # Change the hardcoded size
#         print("##epoch end", validation_step_outputs)
        
#         # dummy_input = torch.zeros([3,224,224], device=self.device)
#         # model_filename = f"model_{str(self.global_step).zfill(5)}.onnx"
#         # torch.onnx.export(self, dummy_input, model_filename, opset_version=11)
#         # artifact = wandb.Artifact(name="model.ckpt", type="model")
#         # artifact.add_file(model_filename)
#         # self.logger.experiment.log_artifact(artifact)

#         flattened_logits = torch.flatten(torch.cat(validation_step_outputs))
#         self.logger.experiment.log(
#             {"valid/logits": wandb.Histogram(flattened_logits.to("cpu")),
#             "global_step": self.global_step})


#     def test_step(self, batch, batch_idx):
#         loss, logits = self(pixel_values = batch["pixel_values"],
#                 labels=batch["labels"])

#         self.test_acc(logits, batch["labels"])

#         self.log("test/loss_epoch", loss, on_step=False, on_epoch=True)
#         self.log("test/acc_epoch", self.test_acc, on_step=False, on_epoch=True)

    # def test_epoch_end(self, test_step_outputs):  # args are defined as part of pl API
    #     # Change the hardcoded size
    #     dummy_input = torch.zeros([3,224,224], device=self.device)
    #     model_filename = "model_final.onnx"
    #     self.to_onnx(model_filename, dummy_input, export_params=True)
    #     artifact = wandb.Artifact(name="model.ckpt", type="model")
    #     artifact.add_file(model_filename)
    #     wandb.log_artifact(artifact)


model = Vit(num_labels = len(labels_names))


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
class ImagePredictionLogger(pl.Callback):
    def __init__(self, num_samples=1):
        super().__init__()
        self.num_samples = num_samples

    def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
#         val_images = batch["image"][:self.num_samples]
#         val_labels = batch["labels"][:self.num_samples]
#         val_tensors = batch["pixel_values"][:self.num_samples]
        
#         # print(val_tensors[:2])
#         inputs = {"pixel_values":val_tensors, "labels":val_labels}
#         _, logits = pl_module(inputs)
#         preds = torch.argmax(logits, 1)

        val_images = outputs["images"][:1]
        val_labels = outputs["label"][:1]
        val_preds = outputs["pred"][:1]
        
        
        # print(outputs)
        trainer.logger.experiment.log({
            "examples": [wandb.Image(x, caption=f"Pred:{pred}, Label:{y}")
                            for x, pred, y in zip(val_images, val_preds, val_labels)],
            "global_step": trainer.global_step})


In [10]:
# logger
log_dir = Path("training") / "logs"
# log_every_n_steps = 50
# print(log_dir)

logger = pl.loggers.WandbLogger(project = "pytorch-lightning", log_model="all", job_type="train")

# logger = pl.loggers.WandbLogger(name = "test-callback", project = "pytorch-lightning", log_model="all", save_dir=str(log_dir), job_type="train")

# logger.watch(model)
# logger.watch(model, log_freq=max(100, log_every_n_steps))

# experiment_dir = logger.experiment.dir

In [12]:
# Trainer
# https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html#pytorch_lightning.trainer.trainer.Trainer

# Notes:
# gpus=int(torch.cuda.is_available()) will be removed on pytorch lightning v2.0
# The recommendation is to use devices and accelerator instead.

# trainer = pl.Trainer(accelerator = 'gpu',
#                      devices = int(torch.cuda.is_available()),
#                      max_epochs = 5,
#                      precision = 32,
#                      log_every_n_steps = log_every_n_steps,
#                      logger = logger)


# grab samples to log predictions on
# samples = next(iter(val_dataloader))

# devices = int(torch.cuda.is_available())

# trainer = pl.Trainer(accelerator = ('gpu' if devices!=0 else "cpu"),
#                      max_epochs = 1,
#                      precision = 16,
#                      log_every_n_steps = 50,
#                      logger = logger,
#                      deterministic=True,     # keep it deterministic
#                      callbacks=[ImagePredictionLogger(samples)])

devices = int(torch.cuda.is_available())

trainer = pl.Trainer(accelerator = ('gpu' if devices!=0 else "cpu"),
                     devices = devices,
                     max_epochs = 1,
                     precision = 16,
                     log_every_n_steps = 50,
                     logger = logger,
                     deterministic=True,    # keep it deterministic
                     callbacks=[ImagePredictionLogger()])

# # test without logger
# trainer = pl.Trainer(accelerator = 'gpu',
#                      devices = int(torch.cuda.is_available()),
#                      max_epochs = 1,
#                      precision = 32)


trainer.fit(model = model,
            train_dataloaders=train_dataloader,
            val_dataloaders=val_dataloader)

# To change the job status to finished on weights and biases
wandb.finish()


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                      | Params
--------------------------------------------------------
0 | model     | ViTForImageClassification | 85.8 M
1 | train_acc | Accuracy                  | 0     
2 | val_acc   | Accuracy                  | 0     
3 | test_acc  | Accuracy                  | 0     
--------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
171.602   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


0,1
epoch,▁▁▁▁
global_step,▁▁█████████████████
train/acc_epoch,▁
train/acc_step,▁▁
train/loss_epoch,▁
train/loss_step,█▁
trainer/global_step,▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██
validation/acc,▁
validation/loss,▁

0,1
epoch,0.0
global_step,130.0
train/acc_epoch,0.90522
train/acc_step,1.0
train/loss_epoch,0.35759
train/loss_step,0.04142
trainer/global_step,129.0
validation/acc,0.33083
validation/loss,3.08626


In [None]:
# To change the job status to finished on weights and biases
wandb.finish()

In [None]:
torch.cuda.empty_cache() 

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()

In [None]:
devices = int(torch.cuda.is_available())
devices

In [None]:
! python -m torch.utils.collect_env

In [None]:
!nvidia-smi