In [1]:

from transformers import ViTForImageClassification
ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", 
                                          num_labels=10, 
                                          ignore_mismatched_sizes=True
                                          )

  from .autonotebook import tqdm as notebook_tqdm
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([10, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [4]:
#!/usr/bin/python

import torch
from torch.nn import functional as F
from torch import optim

from transformers import ViTForImageClassification
import torchmetrics

import lightning as L

class VisionTransformerPretrained(L.LightningModule):
    '''
    Wrapper for the pretrained Vision Transformers
    '''

    def __init__(self, model="google/vit-base-patch16-224", num_classes=1000):

        super().__init__()
        backbone = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224", 
                                                             num_labels=10, ignore_mismatched_sizes=True)
        self.backbone = backbone

        # metrics
        self.acc = torchmetrics.Accuracy('multiclass', num_classes=num_classes)

    def forward(self, x):
        return self.backbone(x)

    def step(self, batch):
       '''
       Any step processes batch to return loss and predictions
       '''
       x, y = batch
       prediction = self.backbone(x)
       y_hat = torch.argmax(prediction.logits, dim=-1)

       loss = F.cross_entropy(prediction.logits, y)
       acc = self.acc(y_hat, y)
       
       return loss, acc, y_hat, y

    def training_step(self, batch, batch_idx):
        loss, acc, y_hat, y = self.step(batch)

        self.log('train_loss', loss)

        return loss

    def validation_step(self, batch, batch_idx):
        loss, acc, y_hat, y = self.step(batch)

        self.log('valid_acc', acc, on_epoch=True, on_step=False)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-4)
        return optimizer
    


In [46]:
import torch
import numpy as np

from sklearn.model_selection import train_test_split

from torchvision.datasets import ImageFolder
from torchvision.transforms import v2

import lightning as L
from torch.utils.data import DataLoader, Subset

class cub(L.LightningDataModule):
    '''
    Lightning datamodule for the EuroSAT dataset
    '''

    def __init__(self, data_root, batch_size):
        super().__init__()
        self.data_root = data_root
        self.batch_size = batch_size

    def setup(self, stage=None):
        '''
        Setup the dataset - here, train / valid / test all at once
        '''

        # define the transforms
        # - resize to (224, 224) as expected for ViT
        # - scale to [0,1] and transform to float32
        # - normalize with ViT mean/std

        transforms = v2.Compose([v2.ToImage(),
                                 v2.Resize(size=(224,224), interpolation=2),
                                 v2.ToDtype(torch.float32, scale=True),
                                 v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                ])
        
        data = ImageFolder(self.data_root, transform=transforms)
        targets = np.asarray(data.targets)
        print(targets)
        self.num_classes = len(data.classes)
        print(self.num_classes)

        self.num_workers = 7
        train_ix, test_ix = train_test_split(np.arange(len(data.targets)), test_size=round(len(data)*0.2), stratify=targets)
        train_ix, valid_ix = train_test_split(train_ix, test_size=round(len(data)*0.2), stratify=targets[train_ix])
                                
        self.train_data = Subset(data, train_ix)
        self.valid_data = Subset(data, valid_ix)
        self.test_data = Subset(data, test_ix)

    def train_dataloader(self):
        return DataLoader(dataset=self.train_data, batch_size=self.batch_size, shuffle=True)

    def valid_dataloader(self):
        return DataLoader(dataset=self.valid_data, batch_size=self.batch_size, shuffle=False)

    def test_dataloader(self):
        return DataLoader(dataset=self.test_data, batch_size=self.batch_size, shuffle=False)
    

In [42]:

import lightning as L

from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger

# from eurosat_module import EuroSAT_RGB_DataModule
# from vision_transformer import VisionTransformerPretrained

def main(arg):
    L.seed_everything(1312)

    # setup data
    datamodule = cub('../datasets/cub/', batch_size=32)
    datamodule.prepare_data()
    datamodule.setup()

    train_dataloader = datamodule.train_dataloader()
    valid_dataloader = datamodule.valid_dataloader()
    test_dataloader = datamodule.test_dataloader()

    # setup model
    model = VisionTransformerPretrained('google/vit-base-patch16-224', datamodule.num_classes, learning_rate=1e-4)

    # setup callbacks
    early_stopping = EarlyStopping(monitor='valid_acc', patience=6, mode='max')

    # logger
    logger = TensorBoardLogger("tensorboard_logs", name='eurosat_vit')

    # train
    trainer = L.Trainer(devices=1, callbacks=[early_stopping], logger=logger)
    trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=valid_dataloader)

    # test
    trainer.test(model=model, dataloaders=test_dataloader, verbose=True)


In [None]:
trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=valid_dataloader)

In [None]:
trainer.test(model=model, dataloaders=test_dataloader, verbose=True)

In [43]:
L.seed_everything(1312)

Seed set to 1312


1312

In [47]:
datamodule = cub('../datasets/images/', batch_size=32)
datamodule.prepare_data()
datamodule.setup()

[  0   0   0 ... 199 199 199]
200


In [48]:
train_dataloader = datamodule.train_dataloader()
valid_dataloader = datamodule.valid_dataloader()
test_dataloader = datamodule.test_dataloader()

In [49]:
model = VisionTransformerPretrained('google/vit-base-patch16-224', datamodule.num_classes)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([10, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [50]:
early_stopping = EarlyStopping(monitor='valid_acc', patience=6, mode='max')

In [51]:
logger = TensorBoardLogger("tensorboard_logs", name='eurosat_vit')

In [52]:
trainer = L.Trainer(devices=1, callbacks=[early_stopping], logger=logger)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [53]:
trainer.fit(model=model,  datamodule=datamodule)

/home/disi/ml/.venv/lib/python3.11/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
/home/disi/ml/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name     | Type                      | Params
-------------------------------------------------------
0 | backbone | ViTForImageClassification | 85.8 M
1 | acc      | MulticlassAccuracy        | 0     
-------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
343.225   Total estimated model params size (MB)
/home/disi/ml/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


[  0   0   0 ... 199 199 199]
200
Epoch 0:   0%|          | 0/222 [00:00<?, ?it/s] 

../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [1,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [2,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [3,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [4,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [5,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/Loss.cu:250: nll_loss_f

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [58]:
from torchvision import transforms

# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],

# std=[0.229, 0.224, 0.225])

# transformation_list = [transforms.Resize((128,128)),

# transforms.ToTensor(),

# normalize]

# transformations = transforms.Compose(transformation_list)

# train_loader = torch.utils.data.DataLoader(

# train_dataset,

# batch_size=BATCH_SIZE, shuffle=True,

# num_workers=WORKERS, pin_memory=True)

# test_loader = torch.utils.data.DataLoader(

# test_dataset,

# batch_size=BATCH_SIZE*2, shuffle=False,

# num_workers=WORKERS, pin_memory=True)

NameError: name 'train_dataset' is not defined

In [61]:
import megaboost as mg

config = {'mode': 'fintune-basic',
    'epochs': 5000, # 300000
    'lr': 0.01,
    'model': 'efficientnet-b1',
    'classes': 10,
    'eval_step': 100,
    'print_freq': 10
    }

megaboost = mg.MegaBoost(config=config)

Downloading: "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth" to /home/disi/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 163MB/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
