In [None]:
!nvidia-smi

Sat Feb 19 14:45:05 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   48C    P0    29W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install wandb
!pip install -q --upgrade timm
!pip install -q --upgrade pytorch-lightning
!pip install --upgrade albumentations
!pip install --upgrade opencv-python
!pip install python-box

# BASE_PATH = '/content/drive/MyDrive/ml competitions/zindi/sign language/'
# !cp -r "{BASE_PATH}" ./data/
# !unzip -q "data/Images.zip" -d ./data

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdevnikhilmishra[0m (use `wandb login --relogin` to force relogin)


True

In [None]:
import numpy as np
import pandas as pd
import random
import os
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import log_loss
from timm import create_model
import albumentations as A
from albumentations.pytorch import ToTensorV2
from box import Box
import pytorch_lightning as pl
import timm
from PIL import Image
from scipy.special import softmax
import gc
from pathlib import Path
from timm.optim import MADGRAD

In [None]:
class Config:
    N_SPLITS = 10
    RANDOM_STATE = 1
    N_EPOCHS = 8

    ID_COL = 'img_IDS'
    LABEL_COL = 'Label'
    
    BATCH_SIZE = 4
    DATA_DIR = './data/'
    NAME = 'swin_10'

    IMAGE_SIZE = 384
    OUTPUT_PATH = './outputs'

    DEBUG=False

In [None]:
train = pd.read_csv(os.path.join(Config.DATA_DIR, 'Train.csv'))
test = pd.read_csv(os.path.join(Config.DATA_DIR, 'Test.csv'))
ss = pd.read_csv(os.path.join(Config.DATA_DIR, 'SampleSubmission.csv'))

In [None]:
def uniform_sampler(df):

    min_val = df[Config.LABEL_COL].value_counts().min()

    new_df = pd.DataFrame()

    for l in df[Config.LABEL_COL].unique():
        new_df = new_df.append(df[df[Config.LABEL_COL] == l].sample(n=min_val))

    new_df = new_df.sample(frac=1, random_state=2).reset_index(drop=True)

    return new_df

In [None]:
config = {'seed': 2021,
          'root': os.path.join(Config.DATA_DIR, 'Images'), 
          'n_splits': Config.N_SPLITS,
          'epoch': Config.N_EPOCHS,
          'trainer': {
              'gpus': [0],
              'accumulate_grad_batches': 1,
              'progress_bar_refresh_rate': 1,
              'fast_dev_run': False,
              'num_sanity_val_steps': 0,
              'resume_from_checkpoint': None,
          },
          'train_loader':{
              'batch_size': Config.BATCH_SIZE,
              'shuffle': True,
              'num_workers': 4,
              'pin_memory': True,
              'drop_last': True,
          },
          'val_loader': {
              'batch_size': 16,
              'shuffle': False,
              'num_workers': 4,
              'pin_memory': True,
              'drop_last': False
         },
          'model':{
              'name': 'swin_large_patch4_window12_384_in22k',
              'output_dim': 9
          },
          'optimizer':{
              'name': 'MADGRAD',
              'params':{
                  'lr': 5e-6
              },
          },
          'scheduler':{
              'name': 'optim.lr_scheduler.CosineAnnealingWarmRestarts',
              'params':{
                  'T_0': 40,
                  'eta_min': 1e-6,
              }
          },
         }

config = Box(config)

In [None]:
[c for c in timm.list_models() if 'swin' in c]

['swin_base_patch4_window7_224',
 'swin_base_patch4_window7_224_in22k',
 'swin_base_patch4_window12_384',
 'swin_base_patch4_window12_384_in22k',
 'swin_large_patch4_window7_224',
 'swin_large_patch4_window7_224_in22k',
 'swin_large_patch4_window12_384',
 'swin_large_patch4_window12_384_in22k',
 'swin_small_patch4_window7_224',
 'swin_tiny_patch4_window7_224']

In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # RGB
IMAGENET_STD = [0.229, 0.224, 0.225]  # RGB

In [None]:
HEIGHT, WIDTH = Config.IMAGE_SIZE, Config.IMAGE_SIZE

In [None]:
class CutoutV2(A.DualTransform):
    def __init__(
        self,
        num_holes=8,
        max_h_size=8,
        max_w_size=8,
        fill_value=0,
        always_apply=False,
        p=0.5,
    ):
        super(CutoutV2, self).__init__(always_apply, p)
        self.num_holes = num_holes
        self.max_h_size = max_h_size
        self.max_w_size = max_w_size
        self.fill_value = fill_value

    def apply(self, image, fill_value=0, holes=(), **params):
        return A.functional.cutout(image, holes, fill_value)

    def get_params_dependent_on_targets(self, params):
        img = params["image"]
        height, width = img.shape[:2]

        holes = []
        for _n in range(self.num_holes):
            y = random.randint(0, height)
            x = random.randint(0, width)

            y1 = np.clip(y - self.max_h_size // 2, 0, height)
            y2 = np.clip(y1 + self.max_h_size, 0, height)
            x1 = np.clip(x - self.max_w_size // 2, 0, width)
            x2 = np.clip(x1 + self.max_w_size, 0, width)
            holes.append((x1, y1, x2, y2))

        return {"holes": holes}

    @property
    def targets_as_params(self):
        return ["image"]

    def get_transform_init_args_names(self):
        return ("num_holes", "max_h_size", "max_w_size")

In [None]:
albu_train_transforms = A.Compose([
    A.Perspective(scale=(0.05, 0.12), p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightness(limit=0.2, p=0.75),
    A.RandomContrast(limit=0.2, p=0.75),
    A.HorizontalFlip(p=0.5),
    A.HueSaturationValue(hue_shift_limit=40, sat_shift_limit=40, val_shift_limit=0, p=0.75),
    A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.3, rotate_limit=5, border_mode=0, p=0.75),
    CutoutV2(max_h_size=int(HEIGHT * 0.03), max_w_size=int(WIDTH * 0.03), num_holes=2, p=0.75),
    A.Resize(height=HEIGHT, width=WIDTH, always_apply=True),
    A.Normalize(always_apply=True),
    ToTensorV2(always_apply=True),
])
albu_val_transforms = A.Compose([
    A.Resize(height=HEIGHT, width=WIDTH, always_apply=True),
    A.Normalize(always_apply=True),
    ToTensorV2(),
])



In [None]:
class SignDataset(torch.utils.data.Dataset):
    def __init__(self, df, augmentations=None):
        self._X = df["img_IDS"].values
        self._y = None
        if augmentations is None:
            print('why')
        if "Label" in df.keys():
            self._y = df["Label"].values
        self.augmentations = augmentations

    def __len__(self):
        return len(self._X)

    def __getitem__(self, idx):
        image_path = self._X[idx]
        image = np.array(Image.open(f'{config.root}/{image_path}.jpg'))

        if self.augmentations is not None:
            augmented_image = self.augmentations(image=image)
            image = augmented_image['image']

        if self._y is not None:
            label = self._y[idx]
            return image, label
        return image

class SignDataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_df,
        val_df,
        test_df,
        cfg,
    ):
        super().__init__()
        self._train_df = train_df
        self._val_df = val_df
        self.test_df = test_df
        self._cfg = cfg

    def __create_dataset(self, train=True):
        return (
            SignDataset(self._train_df, albu_train_transforms)
            if train
            else SignDataset(self._val_df, albu_val_transforms)
        )

    def train_dataloader(self):
        dataset = self.__create_dataset(True)
        return torch.utils.data.DataLoader(dataset, **self._cfg.train_loader)

    def val_dataloader(self):
        dataset = self.__create_dataset(False)
        return torch.utils.data.DataLoader(dataset, **self._cfg.val_loader)
    
    def test_dataloader(self):
        dataset = SignDataset(self.test_df, albu_val_transforms)
        return torch.utils.data.DataLoader(dataset, **self._cfg.val_loader)

In [None]:
mapper = pd.Series(index=train[Config.LABEL_COL].unique(), data=np.arange(train[Config.LABEL_COL].nunique())).to_dict()
inv_mapper = {val: key for key, val in mapper.items()}
train[Config.LABEL_COL] = train[Config.LABEL_COL].map(mapper).astype(np.float64)
Config.N_LABELS = len(mapper)
print(f'Total Labels in train data are: {Config.N_LABELS}')
test[Config.LABEL_COL] = 0

Total Labels in train data are: 9


In [None]:
if Config.DEBUG:
    train = train.sample(frac=0.1, random_state=1).reset_index(drop=True)

In [None]:
def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
    assert alpha > 0, "alpha should be larger than 0"
    assert x.size(0) > 1, "Mixup cannot be applied to a single instance."

    lam = np.random.beta(alpha, alpha)
    rand_index = torch.randperm(x.size()[0])
    mixed_x = lam * x + (1 - lam) * x[rand_index, :]
    target_a, target_b = y, y[rand_index]
    return mixed_x, target_a, target_b, lam

class Model(pl.LightningModule):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.__build_model()
        # self.transform = get_default_transforms()
        self.save_hyperparameters(cfg)
        self.criteria = torch.nn.CrossEntropyLoss(reduction='mean')

    def __build_model(self):
        self.backbone = create_model(
            self.cfg.model.name, pretrained=True, num_classes=0, in_chans=3
        )
        num_features = self.backbone.num_features
        self.fc = nn.Sequential(
            nn.Dropout(0.5), nn.Linear(num_features, Config.N_LABELS)
        )

    def forward(self, x):
        f = self.backbone(x)
        out = self.fc(f)
        return out
    
    def training_step(self, batch, batch_idx):
        loss, pred, labels = self.__share_step(batch, 'train')
        self.log(
            name="train_loss",
            value=loss.cpu(),
            prog_bar=True,
            on_step=True,
            # on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        
        self.log(name='train_metric',
                 value=loss.cpu(),
                 prog_bar=True,
                 on_step=True,
                 on_epoch=True,
                 rank_zero_only=True,
                 sync_dist=True
                 )
        
        return {'loss': loss.cpu(), 'pred': pred, 'labels': labels}
        
    def validation_step(self, batch, batch_idx):
        loss, pred, labels = self.__share_step(batch, 'val')
        self.log(
            name="val_metric",
            value=loss.cpu(),
            prog_bar=True,
            on_step=False,
            on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        
        
        self.log(
            name="val_loss",
            value=loss.cpu(),
            prog_bar=True,
            on_step=False,
            on_epoch=True,
            sync_dist=True,
            rank_zero_only=True,
        )
        return {'loss': loss.cpu(), 'pred': pred, 'labels': labels}
    
    def __share_step(self, batch, mode):
        images, labels = batch
        labels = labels.to(torch.long)


        if torch.rand(1)[0] < 0.3 and mode == 'train':
            mix_images, target_a, target_b, lam = mixup(images, labels, alpha=0.5)
            logits = self.forward(mix_images).squeeze(1)
            loss = self.criteria(logits, target_a) * lam + \
                (1 - lam) * self.criteria(logits, target_b)
        else:
            logits = self.forward(images).squeeze(1)
            loss = self.criteria(logits, labels)
        
        pred = logits.sigmoid().detach().cpu()
        labels = labels.detach().cpu()
        del images, batch
        return loss, pred, labels
        
    def training_epoch_end(self, outputs):
        self.__share_epoch_end(outputs, 'train')

    def validation_epoch_end(self, outputs):
        self.__share_epoch_end(outputs, 'val')    
        
    def __share_epoch_end(self, outputs, mode):
        preds = []
        labels = []
        for out in outputs:
            pred, label = out['pred'], out['labels']
            preds.append(pred)
            labels.append(label)
            del out
            
        preds = torch.cat(preds)
        labels = torch.cat(labels)
        metrics = self.criteria(preds, labels.to(torch.long))
        del preds, labels
        _ = gc.collect()
        self.log(f'{mode}_loss', metrics)
        
    def test_step(self, batch, batch_idx):
        preds = self.model(batch['image'])

        return {'preds': preds}
    
    def predict_step(self, batch, batch_idx):
        preds = self(batch[0].to(torch.float))

        return preds

    def test_epoch_end(self, outputs):
        preds = torch.cat([x['preds'] for x in outputs])
        return preds
    
    def configure_optimizers(self):
        optimizer = eval(self.cfg.optimizer.name)(
            self.parameters(), **self.cfg.optimizer.params
        )
        scheduler = eval(self.cfg.scheduler.name)(
            optimizer,
            **self.cfg.scheduler.params
        )
        return [optimizer], [scheduler]

In [None]:
def get_preds(dl):
    preds = trainer.predict(dataloaders=dl)
    preds = torch.cat(preds).squeeze(-1)
    probs = preds.cpu().detach().numpy()
    del preds
    _ = gc.collect()
    probs = softmax(probs, axis=-1)
    
    return probs

def save_preds(preds, path, ids=None, target=None):
    preds_df = pd.DataFrame(preds)
    preds_df.columns = [inv_mapper[c] for c in preds_df.columns]
    preds_df = preds_df[ss.columns[1:]]

    if ids is not None:
        preds_df.insert(0, Config.ID_COL, ids.values)

    if target is not None:
        preds_df[Config.LABEL_COL] = target.values
        preds_df[Config.LABEL_COL] = preds_df[Config.LABEL_COL].map(inv_mapper)

    preds_df.to_csv(path, index=False)

In [None]:
VERSION_OUTPUT_PATH = Config.OUTPUT_PATH + os.sep + Config.NAME
Path(VERSION_OUTPUT_PATH).mkdir(parents=True, exist_ok=True)

In [None]:
MODEL_SAVE_PATH = 'model_checkpoints' + os.sep + Config.NAME
Path(MODEL_SAVE_PATH).mkdir(parents=True, exist_ok=True)

In [None]:
skf = StratifiedKFold(
    n_splits=Config.N_SPLITS, shuffle=True, random_state=config.seed
)

all_val_preds = np.zeros((train.shape[0], train[Config.LABEL_COL].nunique()))
all_test_preds = np.zeros((test.shape[0], train[Config.LABEL_COL].nunique()))

for fold, (train_idx, val_idx) in enumerate(skf.split(train['img_IDS'], train['Label'])):
    train_df = train.loc[train_idx].reset_index(drop=True)
    val_df = train.loc[val_idx].reset_index(drop=True)
    datamodule = SignDataModule(train_df, val_df, test, config)

    model = Model(config)
    earystopping = pl.callbacks.early_stopping.EarlyStopping(monitor="val_loss")
    lr_monitor = pl.callbacks.LearningRateMonitor()
    
    loss_checkpoint = pl.callbacks.ModelCheckpoint(
        dirpath=MODEL_SAVE_PATH+os.sep+str(fold),
        monitor="val_metric",
        save_top_k=1,
        mode="min",
        filename='model-{epoch}-{val_metric:.4f}-{val_loss:.4f}'
    )
    logger = pl.loggers.WandbLogger(project='zindi_sign_language', name=Config.NAME)

    
    
    swa_callback = pl.callbacks.StochasticWeightAveraging(swa_epoch_start=5)
    

    trainer = pl.Trainer(
        logger=logger,
        max_epochs=config.epoch,
        callbacks=[lr_monitor, loss_checkpoint, swa_callback],
        **config.trainer,
    )
    trainer.fit(model, datamodule=datamodule)
    gc.collect()
    
    val_preds = get_preds(datamodule.val_dataloader())
    metric = log_loss(val_df['Label'], val_preds, labels=np.arange(Config.N_LABELS))
    print(f'Fold {fold} logloss: {metric}')

    all_val_preds[val_idx] = val_preds
    
    test_preds = get_preds(datamodule.test_dataloader())
    all_test_preds += test_preds/Config.N_SPLITS
    
    _ = datamodule.teardown()
    

    path = VERSION_OUTPUT_PATH + os.sep + f'val_preds_fold_{fold}.csv'
    save_preds(val_preds, path, ids=val_df[Config.ID_COL], target=val_df[Config.LABEL_COL])

    path = VERSION_OUTPUT_PATH + os.sep + f'test_preds_fold_{fold}.csv'
    save_preds(test_preds, path, ids=test[Config.ID_COL])


    del model, trainer, test_preds, val_preds, datamodule, train_df, val_df
    _ = gc.collect()
    _ = torch.cuda.empty_cache()
    _ = gc.collect()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/0/model-epoch=7-val_metric=0.1832-val_loss=1.6423.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/0/model-epoch=7-val_metric=0.1832-val_loss=1.6423.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/0/model-epoch=7-val_metric=0.1832-val_loss=1.6423.ckpt


Fold 0 logloss: 0.18315718561890534


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/0/model-epoch=7-val_metric=0.1832-val_loss=1.6423.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/1/model-epoch=1-val_metric=0.1578-val_loss=1.6488.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/1/model-epoch=1-val_metric=0.1578-val_loss=1.6488.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/1/model-epoch=1-val_metric=0.1578-val_loss=1.6488.ckpt


Fold 1 logloss: 0.15778902796995825


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/1/model-epoch=1-val_metric=0.1578-val_loss=1.6488.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/2/model-epoch=4-val_metric=0.1496-val_loss=1.6444.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/2/model-epoch=4-val_metric=0.1496-val_loss=1.6444.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/2/model-epoch=4-val_metric=0.1496-val_loss=1.6444.ckpt


Fold 2 logloss: 0.149605100852903


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/2/model-epoch=4-val_metric=0.1496-val_loss=1.6444.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/3/model-epoch=4-val_metric=0.1497-val_loss=1.6326.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/3/model-epoch=4-val_metric=0.1497-val_loss=1.6326.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/3/model-epoch=4-val_metric=0.1497-val_loss=1.6326.ckpt


Fold 3 logloss: 0.1496847791051958


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/3/model-epoch=4-val_metric=0.1497-val_loss=1.6326.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/4/model-epoch=1-val_metric=0.1827-val_loss=1.6510.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/4/model-epoch=1-val_metric=0.1827-val_loss=1.6510.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/4/model-epoch=1-val_metric=0.1827-val_loss=1.6510.ckpt


Fold 4 logloss: 0.1826855544009828


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/4/model-epoch=1-val_metric=0.1827-val_loss=1.6510.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/5/model-epoch=3-val_metric=0.1808-val_loss=1.6540.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/5/model-epoch=3-val_metric=0.1808-val_loss=1.6540.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/5/model-epoch=3-val_metric=0.1808-val_loss=1.6540.ckpt


Fold 5 logloss: 0.18080501237603602


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/5/model-epoch=3-val_metric=0.1808-val_loss=1.6540.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/6/model-epoch=7-val_metric=0.1721-val_loss=1.6287.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/6/model-epoch=7-val_metric=0.1721-val_loss=1.6287.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/6/model-epoch=7-val_metric=0.1721-val_loss=1.6287.ckpt


Fold 6 logloss: 0.1720876488987822


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/6/model-epoch=7-val_metric=0.1721-val_loss=1.6287.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/7/model-epoch=6-val_metric=0.1514-val_loss=1.6540.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/7/model-epoch=6-val_metric=0.1514-val_loss=1.6540.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/7/model-epoch=6-val_metric=0.1514-val_loss=1.6540.ckpt


Fold 7 logloss: 0.1514077344596386


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/7/model-epoch=6-val_metric=0.1514-val_loss=1.6540.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/8/model-epoch=3-val_metric=0.1399-val_loss=1.6421.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/8/model-epoch=3-val_metric=0.1399-val_loss=1.6421.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/8/model-epoch=3-val_metric=0.1399-val_loss=1.6421.ckpt


Fold 8 logloss: 0.13994755386689212


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/8/model-epoch=3-val_metric=0.1399-val_loss=1.6421.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "
  f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"

  | Name     | Type             | Params
----------------------------------------------
0 | backbone | SwinTransformer  | 195 M 
1 | fc       | Sequential       | 13.8 K
2 | criteria | CrossEntropyLoss | 0     
----------------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.849   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Swapping scheduler `CosineAnnealingWarmRestarts` for `SWALR`


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/9/model-epoch=3-val_metric=0.2308-val_loss=1.6529.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/9/model-epoch=3-val_metric=0.2308-val_loss=1.6529.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"`.{fn}(ckpt_path=None)` was called without a model."
  f"DataModule.{name} has already been called, so it will not be called again. "
Restoring states from the checkpoint path at /content/model_checkpoints/swin_10/9/model-epoch=3-val_metric=0.2308-val_loss=1.6529.ckpt


Fold 9 logloss: 0.2307526549600425


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from checkpoint at /content/model_checkpoints/swin_10/9/model-epoch=3-val_metric=0.2308-val_loss=1.6529.ckpt


Predicting: 1406it [00:00, ?it/s]

  f"DataModule.{name} has already been called, so it will not be called again. "


In [None]:
oof_metric = log_loss(train['Label'], all_val_preds, labels=np.arange(9))
print(f'OOF logloss: {oof_metric}')

OOF logloss: 0.16978247088110038


In [None]:
path = VERSION_OUTPUT_PATH + os.sep + f'test_preds_all_folds_mean.csv'
save_preds(all_test_preds, path, ids=test[Config.ID_COL])